hgbook
diff tools/po4a/lib/Locale/Po4a/Xml.pm @ 623:082bb76417f1
Add Po4a 0.37-dev(2009-03-08)
author | Dongsheng Song <dongsheng.song@gmail.com> |
---|---|
date | Thu Mar 12 15:43:56 2009 +0800 (2009-03-12) |
parents | |
children |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/tools/po4a/lib/Locale/Po4a/Xml.pm Thu Mar 12 15:43:56 2009 +0800 1.3 @@ -0,0 +1,1973 @@ 1.4 +#!/usr/bin/perl 1.5 + 1.6 +# Po4a::Xml.pm 1.7 +# 1.8 +# extract and translate translatable strings from XML documents. 1.9 +# 1.10 +# This code extracts plain text from tags and attributes from generic 1.11 +# XML documents, and it can be used as a base to build modules for 1.12 +# XML-based documents. 1.13 +# 1.14 +# Copyright (c) 2004 by Jordi Vilalta <jvprat@gmail.com> 1.15 +# Copyright (c) 2008-2009 by Nicolas François <nicolas.francois@centraliens.net> 1.16 +# 1.17 +# This program is free software; you can redistribute it and/or modify 1.18 +# it under the terms of the GNU General Public License as published by 1.19 +# the Free Software Foundation; either version 2 of the License, or 1.20 +# (at your option) any later version. 1.21 +# 1.22 +# This program is distributed in the hope that it will be useful, 1.23 +# but WITHOUT ANY WARRANTY; without even the implied warranty of 1.24 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 1.25 +# GNU General Public License for more details. 1.26 +# 1.27 +# You should have received a copy of the GNU General Public License 1.28 +# along with this program; if not, write to the Free Software 1.29 +# Foundation, Inc., 1.30 +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 1.31 +# 1.32 +######################################################################## 1.33 + 1.34 +=head1 NAME 1.35 + 1.36 +Locale::Po4a::Xml - Convert XML documents and derivates from/to PO files 1.37 + 1.38 +=head1 DESCRIPTION 1.39 + 1.40 +The po4a (po for anything) project goal is to ease translations (and more 1.41 +interestingly, the maintenance of translations) using gettext tools on 1.42 +areas where they were not expected like documentation. 1.43 + 1.44 +Locale::Po4a::Xml is a module to help the translation of XML documents into 1.45 +other [human] languages. It can also be used as a base to build modules for 1.46 +XML-based documents. 1.47 + 1.48 +=cut 1.49 + 1.50 +package Locale::Po4a::Xml; 1.51 + 1.52 +use 5.006; 1.53 +use strict; 1.54 +use warnings; 1.55 + 1.56 +require Exporter; 1.57 +use vars qw(@ISA @EXPORT); 1.58 +@ISA = qw(Locale::Po4a::TransTractor); 1.59 +@EXPORT = qw(new initialize @tag_types); 1.60 + 1.61 +use Locale::Po4a::TransTractor; 1.62 +use Locale::Po4a::Common; 1.63 +use Carp qw(croak); 1.64 +use File::Basename; 1.65 +use File::Spec; 1.66 + 1.67 +#It will mantain the path from the root tag to the current one 1.68 +my @path; 1.69 + 1.70 +#It will contain a list of external entities and their attached paths 1.71 +my %entities; 1.72 + 1.73 +my @comments; 1.74 + 1.75 +sub shiftline { 1.76 + my $self = shift; 1.77 + # call Transtractor's shiftline 1.78 + my ($line,$ref) = $self->SUPER::shiftline(); 1.79 + return ($line,$ref) if (not defined $line); 1.80 + 1.81 + for my $k (keys %entities) { 1.82 + if ($line =~ m/^(.*?)&$k;(.*)$/s) { 1.83 + my ($before, $after) = ($1, $2); 1.84 + my $linenum=0; 1.85 + my @textentries; 1.86 + 1.87 + open (my $in, $entities{$k}) 1.88 + or croak wrap_mod("po4a::xml", 1.89 + dgettext("po4a", "Can't read from %s: %s"), 1.90 + $entities{$k}, $!); 1.91 + while (defined (my $textline = <$in>)) { 1.92 + $linenum++; 1.93 + my $textref=$entities{$k}.":$linenum"; 1.94 + push @textentries, ($textline,$textref); 1.95 + } 1.96 + close $in 1.97 + or croak wrap_mod("po4a::xml", 1.98 + dgettext("po4a", "Can't close %s after reading: %s"), 1.99 + $entities{$k}, $!); 1.100 + 1.101 + push @textentries, ($after, $ref); 1.102 + $line = $before.(shift @textentries); 1.103 + $ref .= " ".(shift @textentries); 1.104 + $self->unshiftline(@textentries); 1.105 + } 1.106 + } 1.107 + 1.108 + return ($line,$ref); 1.109 +} 1.110 + 1.111 +sub read { 1.112 + my ($self,$filename)=@_; 1.113 + push @{$self->{DOCPOD}{infile}}, $filename; 1.114 + $self->Locale::Po4a::TransTractor::read($filename); 1.115 +} 1.116 + 1.117 +sub parse { 1.118 + my $self=shift; 1.119 + map {$self->parse_file($_)} @{$self->{DOCPOD}{infile}}; 1.120 +} 1.121 + 1.122 +# @save_holders is a stack of references to ('paragraph', 'translation', 1.123 +# 'sub_translations', 'open', 'close', 'folded_attributes') hashes, where: 1.124 +# paragraph is a reference to an array (see paragraph in the 1.125 +# treat_content() subroutine) of strings followed by 1.126 +# references. It contains the @paragraph array as it was 1.127 +# before the processing was interrupted by a tag instroducing 1.128 +# a placeholder. 1.129 +# translation is the translation of this level up to now 1.130 +# sub_translations is a reference to an array of strings containing the 1.131 +# translations which must replace the placeholders. 1.132 +# open is the tag which opened the placeholder. 1.133 +# close is the tag which closed the placeholder. 1.134 +# folded_attributes is an hash of tags with their attributes (<tag attrs=...> 1.135 +# strings), referenced by the folded tag id, which should 1.136 +# replace the <tag po4a-id=id> strings in the current 1.137 +# translation. 1.138 +# 1.139 +# If @save_holders only has 1 holder, then we are not processing the 1.140 +# content of an holder, we are translating the document. 1.141 +my @save_holders; 1.142 + 1.143 + 1.144 +# If we are at the bottom of the stack and there is no <placeholder ...> in 1.145 +# the current translation, we can push the translation in the translated 1.146 +# document. 1.147 +# Otherwise, we keep the translation in the current holder. 1.148 +sub pushline { 1.149 + my ($self, $line) = (shift, shift); 1.150 + 1.151 + my $holder = $save_holders[$#save_holders]; 1.152 + my $translation = $holder->{'translation'}; 1.153 + $translation .= $line; 1.154 + 1.155 + while ( %{$holder->{folded_attributes}} 1.156 + and $translation =~ m/^(.*)<([^>]+?)\s+po4a-id=([0-9]+)>(.*)$/s) { 1.157 + my $begin = $1; 1.158 + my $tag = $2; 1.159 + my $id = $3; 1.160 + my $end = $4; 1.161 + if (defined $holder->{folded_attributes}->{$id}) { 1.162 + # TODO: check if the tag is the same 1.163 + $translation = $begin.$holder->{folded_attributes}->{$id}.$end; 1.164 + delete $holder->{folded_attributes}->{$id}; 1.165 + } else { 1.166 + # TODO: It will be hard to identify the location. 1.167 + # => find a way to retrieve the reference. 1.168 + die wrap_mod("po4a::xml", dgettext("po4a", "'po4a-id=%d' in the translation does not exist in the original string (or 'po4a-id=%d' used twice in the translation)."), $id, $id); 1.169 + } 1.170 + } 1.171 +# TODO: check that %folded_attributes is empty at some time 1.172 +# => in translate_paragraph? 1.173 + 1.174 + if ( ($#save_holders > 0) 1.175 + or ($translation =~ m/<placeholder\s+type="[^"]+"\s+id="(\d+)"\s*\/>/s)) { 1.176 + $holder->{'translation'} = $translation; 1.177 + } else { 1.178 + $self->SUPER::pushline($translation); 1.179 + $holder->{'translation'} = ''; 1.180 + } 1.181 +} 1.182 + 1.183 +=head1 TRANSLATING WITH PO4A::XML 1.184 + 1.185 +This module can be used directly to handle generic XML documents. This will 1.186 +extract all tag's content, and no attributes, since it's where the text is 1.187 +written in most XML based documents. 1.188 + 1.189 +There are some options (described in the next section) that can customize 1.190 +this behavior. If this doesn't fit to your document format you're encouraged 1.191 +to write your own module derived from this, to describe your format's details. 1.192 +See the section "Writing derivate modules" below, for the process description. 1.193 + 1.194 +=cut 1.195 + 1.196 +# 1.197 +# Parse file and translate it 1.198 +# 1.199 +sub parse_file { 1.200 + my ($self,$filename) = @_; 1.201 + my $eof = 0; 1.202 + 1.203 + while (!$eof) { 1.204 + # We get all the text until the next breaking tag (not 1.205 + # inline) and translate it 1.206 + $eof = $self->treat_content; 1.207 + if (!$eof) { 1.208 + # And then we treat the following breaking tag 1.209 + $eof = $self->treat_tag; 1.210 + } 1.211 + } 1.212 +} 1.213 + 1.214 +=head1 OPTIONS ACCEPTED BY THIS MODULE 1.215 + 1.216 +The global debug option causes this module to show the excluded strings, in 1.217 +order to see if it skips something important. 1.218 + 1.219 +These are this module's particular options: 1.220 + 1.221 +=over 4 1.222 + 1.223 +=item B<nostrip> 1.224 + 1.225 +Prevents it to strip the spaces around the extracted strings. 1.226 + 1.227 +=item B<wrap> 1.228 + 1.229 +Canonizes the string to translate, considering that whitespaces are not 1.230 +important, and wraps the translated document. This option can be overridden 1.231 +by custom tag options. See the "tags" option below. 1.232 + 1.233 +=item B<caseinsensitive> 1.234 + 1.235 +It makes the tags and attributes searching to work in a case insensitive 1.236 +way. If it's defined, it will treat E<lt>BooKE<gt>laNG and E<lt>BOOKE<gt>Lang as E<lt>bookE<gt>lang. 1.237 + 1.238 +=item B<includeexternal> 1.239 + 1.240 +When defined, external entities are included in the generated (translated) 1.241 +document, and for the extraction of strings. If it's not defined, you 1.242 +will have to translate external entities separately as independent 1.243 +documents. 1.244 + 1.245 +=item B<ontagerror> 1.246 + 1.247 +This option defines the behavior of the module when it encounter a invalid 1.248 +Xml syntax (a closing tag which does not match the last opening tag, or a 1.249 +tag's attribute without value). 1.250 +It can take the following values: 1.251 + 1.252 +=over 1.253 + 1.254 +=item I<fail> 1.255 + 1.256 +This is the default value. 1.257 +The module will exit with an error. 1.258 + 1.259 +=item I<warn> 1.260 + 1.261 +The module will continue, and will issue a warning. 1.262 + 1.263 +=item I<silent> 1.264 + 1.265 +The module will continue without any warnings. 1.266 + 1.267 +=back 1.268 + 1.269 +Be careful when using this option. 1.270 +It is generally recommended to fix the input file. 1.271 + 1.272 +=item B<tagsonly> 1.273 + 1.274 +Extracts only the specified tags in the "tags" option. Otherwise, it 1.275 +will extract all the tags except the ones specified. 1.276 + 1.277 +Note: This option is deprecated. 1.278 + 1.279 +=item B<doctype> 1.280 + 1.281 +String that will try to match with the first line of the document's doctype 1.282 +(if defined). If it doesn't, a warning will indicate that the document 1.283 +might be of a bad type. 1.284 + 1.285 +=item B<tags> 1.286 + 1.287 +Space-separated list of tags you want to translate or skip. By default, 1.288 +the specified tags will be excluded, but if you use the "tagsonly" option, 1.289 +the specified tags will be the only ones included. The tags must be in the 1.290 +form E<lt>aaaE<gt>, but you can join some (E<lt>bbbE<gt>E<lt>aaaE<gt>) to say that the content of 1.291 +the tag E<lt>aaaE<gt> will only be translated when it's into a E<lt>bbbE<gt> tag. 1.292 + 1.293 +You can also specify some tag options putting some characters in front of 1.294 +the tag hierarchy. For example, you can put 'w' (wrap) or 'W' (don't wrap) 1.295 +to override the default behavior specified by the global "wrap" option. 1.296 + 1.297 +Example: WE<lt>chapterE<gt>E<lt>titleE<gt> 1.298 + 1.299 +Note: This option is deprecated. 1.300 +You should use the B<translated> and B<untranslated> options instead. 1.301 + 1.302 +=item B<attributes> 1.303 + 1.304 +Space-separated list of tag's attributes you want to translate. You can 1.305 +specify the attributes by their name (for example, "lang"), but you can 1.306 +prefix it with a tag hierarchy, to specify that this attribute will only be 1.307 +translated when it's into the specified tag. For example: E<lt>bbbE<gt>E<lt>aaaE<gt>lang 1.308 +specifies that the lang attribute will only be translated if it's into an 1.309 +E<lt>aaaE<gt> tag, and it's into a E<lt>bbbE<gt> tag. 1.310 + 1.311 +=item B<foldattributes> 1.312 + 1.313 +Do not translate attributes in inline tags. 1.314 +Instead, replace all attributes of a tag by po4a-id=<id>. 1.315 + 1.316 +This is useful when attributes shall not be translated, as this simplifies the 1.317 +strings for translators, and avoids typos. 1.318 + 1.319 +=item B<break> 1.320 + 1.321 +Space-separated list of tags which should break the sequence. 1.322 +By default, all tags break the sequence. 1.323 + 1.324 +The tags must be in the form <aaa>, but you can join some 1.325 +(<bbb><aaa>), if a tag (<aaa>) should only be considered 1.326 +when it's into another tag (<bbb>). 1.327 + 1.328 +=item B<inline> 1.329 + 1.330 +Space-separated list of tags which should be treated as inline. 1.331 +By default, all tags break the sequence. 1.332 + 1.333 +The tags must be in the form <aaa>, but you can join some 1.334 +(<bbb><aaa>), if a tag (<aaa>) should only be considered 1.335 +when it's into another tag (<bbb>). 1.336 + 1.337 +=item B<placeholder> 1.338 + 1.339 +Space-separated list of tags which should be treated as placeholders. 1.340 +Placeholders do not break the sequence, but the content of placeholders is 1.341 +translated separately. 1.342 + 1.343 +The location of the placeholder in its blocks will be marked with a string 1.344 +similar to: 1.345 + 1.346 + <placeholder type=\"footnote\" id=\"0\"/> 1.347 + 1.348 +The tags must be in the form <aaa>, but you can join some 1.349 +(<bbb><aaa>), if a tag (<aaa>) should only be considered 1.350 +when it's into another tag (<bbb>). 1.351 + 1.352 +=item B<nodefault> 1.353 + 1.354 +Space separated list of tags that the module should not try to set by 1.355 +default in any category. 1.356 + 1.357 +=item B<cpp> 1.358 + 1.359 +Support C preprocessor directives. 1.360 +When this option is set, po4a will consider preprocessor directives as 1.361 +paragraph separators. 1.362 +This is important if the XML file must be preprocessed because otherwise 1.363 +the directives may be inserted in the middle of lines if po4a consider it 1.364 +belong to the current paragraph, and they won't be recognized by the 1.365 +preprocessor. 1.366 +Note: the preprocessor directives must only appear between tags 1.367 +(they must not break a tag). 1.368 + 1.369 +=item B<translated> 1.370 + 1.371 +Space-separated list of tags you want to translate. 1.372 + 1.373 +The tags must be in the form <aaa>, but you can join some 1.374 +(<bbb><aaa>), if a tag (<aaa>) should only be considered 1.375 +when it's into another tag (<bbb>). 1.376 + 1.377 +You can also specify some tag options putting some characters in front of 1.378 +the tag hierarchy. For example, you can put 'w' (wrap) or 'W' (don't wrap) 1.379 +to overide the default behavior specified by the global "wrap" option. 1.380 + 1.381 +Example: WE<lt>chapterE<gt>E<lt>titleE<gt> 1.382 + 1.383 +=item B<untranslated> 1.384 + 1.385 +Space-separated list of tags you do not want to translate. 1.386 + 1.387 +The tags must be in the form <aaa>, but you can join some 1.388 +(<bbb><aaa>), if a tag (<aaa>) should only be considered 1.389 +when it's into another tag (<bbb>). 1.390 + 1.391 +=item B<defaulttranslateoption> 1.392 + 1.393 +The default categories for tags that are not in any of the translated, 1.394 +untranslated, break, inline, or placeholder. 1.395 + 1.396 +This is a set of letters: 1.397 + 1.398 +=over 1.399 + 1.400 +=item I<w> 1.401 + 1.402 +Tags should be translated and content can be re-wrapped. 1.403 + 1.404 +=item I<W> 1.405 + 1.406 +Tags should be translated and content should not be re-wrapped. 1.407 + 1.408 +=item I<i> 1.409 + 1.410 +Tags should be translated inline. 1.411 + 1.412 +=item I<p> 1.413 + 1.414 +Tags should be translated as placeholders. 1.415 + 1.416 +=back 1.417 + 1.418 +=back 1.419 + 1.420 +=cut 1.421 +# TODO: defaulttranslateoption 1.422 +# w => indicate that it is only valid for translatable tags and do not 1.423 +# care about inline/break/placeholder? 1.424 +# ... 1.425 + 1.426 +sub initialize { 1.427 + my $self = shift; 1.428 + my %options = @_; 1.429 + 1.430 + # Reset the path 1.431 + @path = (); 1.432 + 1.433 + # Initialize the stack of holders 1.434 + my @paragraph = (); 1.435 + my @sub_translations = (); 1.436 + my %folded_attributes; 1.437 + my %holder = ('paragraph' => \@paragraph, 1.438 + 'translation' => "", 1.439 + 'sub_translations' => \@sub_translations, 1.440 + 'folded_attributes' => \%folded_attributes); 1.441 + @save_holders = (\%holder); 1.442 + 1.443 + $self->{options}{'nostrip'}=0; 1.444 + $self->{options}{'wrap'}=0; 1.445 + $self->{options}{'caseinsensitive'}=0; 1.446 + $self->{options}{'tagsonly'}=0; 1.447 + $self->{options}{'tags'}=''; 1.448 + $self->{options}{'break'}=''; 1.449 + $self->{options}{'translated'}=''; 1.450 + $self->{options}{'untranslated'}=''; 1.451 + $self->{options}{'defaulttranslateoption'}=''; 1.452 + $self->{options}{'attributes'}=''; 1.453 + $self->{options}{'foldattributes'}=0; 1.454 + $self->{options}{'inline'}=''; 1.455 + $self->{options}{'placeholder'}=''; 1.456 + $self->{options}{'doctype'}=''; 1.457 + $self->{options}{'nodefault'}=''; 1.458 + $self->{options}{'includeexternal'}=0; 1.459 + $self->{options}{'ontagerror'}="fail"; 1.460 + $self->{options}{'cpp'}=0; 1.461 + 1.462 + $self->{options}{'verbose'}=''; 1.463 + $self->{options}{'debug'}=''; 1.464 + 1.465 + foreach my $opt (keys %options) { 1.466 + if ($options{$opt}) { 1.467 + die wrap_mod("po4a::xml", 1.468 + dgettext("po4a", "Unknown option: %s"), $opt) 1.469 + unless exists $self->{options}{$opt}; 1.470 + $self->{options}{$opt} = $options{$opt}; 1.471 + } 1.472 + } 1.473 + # Default options set by modules. Forbidden for users. 1.474 + $self->{options}{'_default_translated'}=''; 1.475 + $self->{options}{'_default_untranslated'}=''; 1.476 + $self->{options}{'_default_break'}=''; 1.477 + $self->{options}{'_default_inline'}=''; 1.478 + $self->{options}{'_default_placeholder'}=''; 1.479 + $self->{options}{'_default_attributes'}=''; 1.480 + 1.481 + #It will maintain the list of the translatable tags 1.482 + $self->{tags}=(); 1.483 + $self->{translated}=(); 1.484 + $self->{untranslated}=(); 1.485 + #It will maintain the list of the translatable attributes 1.486 + $self->{attributes}=(); 1.487 + #It will maintain the list of the breaking tags 1.488 + $self->{break}=(); 1.489 + #It will maintain the list of the inline tags 1.490 + $self->{inline}=(); 1.491 + #It will maintain the list of the placeholder tags 1.492 + $self->{placeholder}=(); 1.493 + #list of the tags that must not be set in the tags or inline category 1.494 + #by this module or sub-module (unless specified in an option) 1.495 + $self->{nodefault}=(); 1.496 + 1.497 + $self->treat_options; 1.498 +} 1.499 + 1.500 +=head1 WRITING DERIVATE MODULES 1.501 + 1.502 +=head2 DEFINE WHAT TAGS AND ATTRIBUTES TO TRANSLATE 1.503 + 1.504 +The simplest customization is to define which tags and attributes you want 1.505 +the parser to translate. This should be done in the initialize function. 1.506 +First you should call the main initialize, to get the command-line options, 1.507 +and then, append your custom definitions to the options hash. If you want 1.508 +to treat some new options from command line, you should define them before 1.509 +calling the main initialize: 1.510 + 1.511 + $self->{options}{'new_option'}=''; 1.512 + $self->SUPER::initialize(%options); 1.513 + $self->{options}{'_default_translated'}.=' <p> <head><title>'; 1.514 + $self->{options}{'attributes'}.=' <p>lang id'; 1.515 + $self->{options}{'_default_inline'}.=' <br>'; 1.516 + $self->treat_options; 1.517 + 1.518 +You should use the B<_default_inline>, B<_default_break>, 1.519 +B<_default_placeholder>, B<_default_translated>, B<_default_untranslated>, 1.520 +and B<_default_attributes> options in derivated modules. This allow users 1.521 +to override the default behavior defined in your module with command line 1.522 +options. 1.523 + 1.524 +=head2 OVERRIDING THE found_string FUNCTION 1.525 + 1.526 +Another simple step is to override the function "found_string", which 1.527 +receives the extracted strings from the parser, in order to translate them. 1.528 +There you can control which strings you want to translate, and perform 1.529 +transformations to them before or after the translation itself. 1.530 + 1.531 +It receives the extracted text, the reference on where it was, and a hash 1.532 +that contains extra information to control what strings to translate, how 1.533 +to translate them and to generate the comment. 1.534 + 1.535 +The content of these options depends on the kind of string it is (specified in an 1.536 +entry of this hash): 1.537 + 1.538 +=over 1.539 + 1.540 +=item type="tag" 1.541 + 1.542 +The found string is the content of a translatable tag. The entry "tag_options" 1.543 +contains the option characters in front of the tag hierarchy in the module 1.544 +"tags" option. 1.545 + 1.546 +=item type="attribute" 1.547 + 1.548 +Means that the found string is the value of a translatable attribute. The 1.549 +entry "attribute" has the name of the attribute. 1.550 + 1.551 +=back 1.552 + 1.553 +It must return the text that will replace the original in the translated 1.554 +document. Here's a basic example of this function: 1.555 + 1.556 + sub found_string { 1.557 + my ($self,$text,$ref,$options)=@_; 1.558 + $text = $self->translate($text,$ref,"type ".$options->{'type'}, 1.559 + 'wrap'=>$self->{options}{'wrap'}); 1.560 + return $text; 1.561 + } 1.562 + 1.563 +There's another simple example in the new Dia module, which only filters 1.564 +some strings. 1.565 + 1.566 +=cut 1.567 + 1.568 +sub found_string { 1.569 + my ($self,$text,$ref,$options)=@_; 1.570 + 1.571 + if ($text =~ m/^\s*$/s) { 1.572 + return $text; 1.573 + } 1.574 + 1.575 + my $comment; 1.576 + my $wrap = $self->{options}{'wrap'}; 1.577 + 1.578 + if ($options->{'type'} eq "tag") { 1.579 + $comment = "Content of: ".$self->get_path; 1.580 + 1.581 + if($options->{'tag_options'} =~ /w/) { 1.582 + $wrap = 1; 1.583 + } 1.584 + if($options->{'tag_options'} =~ /W/) { 1.585 + $wrap = 0; 1.586 + } 1.587 + } elsif ($options->{'type'} eq "attribute") { 1.588 + $comment = "Attribute '".$options->{'attribute'}."' of: ".$self->get_path; 1.589 + } elsif ($options->{'type'} eq "CDATA") { 1.590 + $comment = "CDATA"; 1.591 + $wrap = 0; 1.592 + } else { 1.593 + die wrap_ref_mod($ref, "po4a::xml", dgettext("po4a", "Internal error: unknown type identifier '%s'."), $options->{'type'}); 1.594 + } 1.595 + $text = $self->translate($text,$ref,$comment,'wrap'=>$wrap, comment => $options->{'comments'}); 1.596 + return $text; 1.597 +} 1.598 + 1.599 +=head2 MODIFYING TAG TYPES (TODO) 1.600 + 1.601 +This is a more complex one, but it enables a (almost) total customization. 1.602 +It's based in a list of hashes, each one defining a tag type's behavior. The 1.603 +list should be sorted so that the most general tags are after the most 1.604 +concrete ones (sorted first by the beginning and then by the end keys). To 1.605 +define a tag type you'll have to make a hash with the following keys: 1.606 + 1.607 +=over 4 1.608 + 1.609 +=item beginning 1.610 + 1.611 +Specifies the beginning of the tag, after the "E<lt>". 1.612 + 1.613 +=item end 1.614 + 1.615 +Specifies the end of the tag, before the "E<gt>". 1.616 + 1.617 +=item breaking 1.618 + 1.619 +It says if this is a breaking tag class. A non-breaking (inline) tag is one 1.620 +that can be taken as part of the content of another tag. It can take the 1.621 +values false (0), true (1) or undefined. If you leave this undefined, you'll 1.622 +have to define the f_breaking function that will say whether a concrete tag of 1.623 +this class is a breaking tag or not. 1.624 + 1.625 +=item f_breaking 1.626 + 1.627 +It's a function that will tell if the next tag is a breaking one or not. It 1.628 +should be defined if the "breaking" option is not. 1.629 + 1.630 +=item f_extract 1.631 + 1.632 +If you leave this key undefined, the generic extraction function will have to 1.633 +extract the tag itself. It's useful for tags that can have other tags or 1.634 +special structures in them, so that the main parser doesn't get mad. This 1.635 +function receives a boolean that says if the tag should be removed from the 1.636 +input stream or not. 1.637 + 1.638 +=item f_translate 1.639 + 1.640 +This function receives the tag (in the get_string_until() format) and returns 1.641 +the translated tag (translated attributes or all needed transformations) as a 1.642 +single string. 1.643 + 1.644 +=back 1.645 + 1.646 +=cut 1.647 + 1.648 +##### Generic XML tag types #####' 1.649 + 1.650 +our @tag_types = ( 1.651 + { beginning => "!--#", 1.652 + end => "--", 1.653 + breaking => 0, 1.654 + f_extract => \&tag_extract_comment, 1.655 + f_translate => \&tag_trans_comment}, 1.656 + { beginning => "!--", 1.657 + end => "--", 1.658 + breaking => 0, 1.659 + f_extract => \&tag_extract_comment, 1.660 + f_translate => \&tag_trans_comment}, 1.661 + { beginning => "?xml", 1.662 + end => "?", 1.663 + breaking => 1, 1.664 + f_translate => \&tag_trans_xmlhead}, 1.665 + { beginning => "?", 1.666 + end => "?", 1.667 + breaking => 1, 1.668 + f_translate => \&tag_trans_procins}, 1.669 + { beginning => "!DOCTYPE", 1.670 + end => "", 1.671 + breaking => 1, 1.672 + f_extract => \&tag_extract_doctype, 1.673 + f_translate => \&tag_trans_doctype}, 1.674 + { beginning => "![CDATA[", 1.675 + end => "", 1.676 + breaking => 1, 1.677 + f_extract => \&CDATA_extract, 1.678 + f_translate => \&CDATA_trans}, 1.679 + { beginning => "/", 1.680 + end => "", 1.681 + f_breaking => \&tag_break_close, 1.682 + f_translate => \&tag_trans_close}, 1.683 + { beginning => "", 1.684 + end => "/", 1.685 + f_breaking => \&tag_break_alone, 1.686 + f_translate => \&tag_trans_alone}, 1.687 + { beginning => "", 1.688 + end => "", 1.689 + f_breaking => \&tag_break_open, 1.690 + f_translate => \&tag_trans_open} 1.691 +); 1.692 + 1.693 +sub tag_extract_comment { 1.694 + my ($self,$remove)=(shift,shift); 1.695 + my ($eof,@tag)=$self->get_string_until('-->',{include=>1,remove=>$remove}); 1.696 + return ($eof,@tag); 1.697 +} 1.698 + 1.699 +sub tag_trans_comment { 1.700 + my ($self,@tag)=@_; 1.701 + return $self->join_lines(@tag); 1.702 +} 1.703 + 1.704 +sub tag_trans_xmlhead { 1.705 + my ($self,@tag)=@_; 1.706 + 1.707 + # We don't have to translate anything from here: throw away references 1.708 + my $tag = $self->join_lines(@tag); 1.709 + $tag =~ /encoding=(("|')|)(.*?)(\s|\2)/s; 1.710 + my $in_charset=$3; 1.711 + $self->detected_charset($in_charset); 1.712 + my $out_charset=$self->get_out_charset; 1.713 + 1.714 + if (defined $in_charset) { 1.715 + $tag =~ s/$in_charset/$out_charset/; 1.716 + } else { 1.717 + if ($tag =~ m/standalone/) { 1.718 + $tag =~ s/(standalone)/encoding="$out_charset" $1/; 1.719 + } else { 1.720 + $tag.= " encoding=\"$out_charset\""; 1.721 + } 1.722 + } 1.723 + 1.724 + return $tag; 1.725 +} 1.726 + 1.727 +sub tag_trans_procins { 1.728 + my ($self,@tag)=@_; 1.729 + return $self->join_lines(@tag); 1.730 +} 1.731 + 1.732 +sub tag_extract_doctype { 1.733 + my ($self,$remove)=(shift,shift); 1.734 + 1.735 + # Check if there is an internal subset (between []). 1.736 + my ($eof,@tag)=$self->get_string_until('>',{include=>1,unquoted=>1}); 1.737 + my $parity = 0; 1.738 + my $paragraph = ""; 1.739 + map { $parity = 1 - $parity; $paragraph.= $parity?$_:""; } @tag; 1.740 + my $found = 0; 1.741 + if ($paragraph =~ m/<.*\[.*</s) { 1.742 + $found = 1 1.743 + } 1.744 + 1.745 + if (not $found) { 1.746 + ($eof,@tag)=$self->get_string_until('>',{include=>1,remove=>$remove,unquoted=>1}); 1.747 + } else { 1.748 + ($eof,@tag)=$self->get_string_until(']\s*>',{include=>1,remove=>$remove,unquoted=>1,regex=>1}); 1.749 + } 1.750 + return ($eof,@tag); 1.751 +} 1.752 + 1.753 +sub tag_trans_doctype { 1.754 +# This check is not really reliable. There are system and public 1.755 +# identifiers. Only the public one could be checked reliably. 1.756 + my ($self,@tag)=@_; 1.757 + if (defined $self->{options}{'doctype'} ) { 1.758 + my $doctype = $self->{options}{'doctype'}; 1.759 + if ( $tag[0] !~ /\Q$doctype\E/i ) { 1.760 + warn wrap_ref_mod($tag[1], "po4a::xml", dgettext("po4a", "Bad document type. '%s' expected. You can fix this warning with a -o doctype option, or ignore this check with -o doctype=\"\"."), $doctype); 1.761 + } 1.762 + } 1.763 + my $i = 0; 1.764 + my $basedir = $tag[1]; 1.765 + $basedir =~ s/:[0-9]+$//; 1.766 + $basedir = dirname($basedir); 1.767 + 1.768 + while ( $i < $#tag ) { 1.769 + my $t = $tag[$i]; 1.770 + my $ref = $tag[$i+1]; 1.771 + if ( $t =~ /^(\s*<!ENTITY\s+)(.*)$/is ) { 1.772 + my $part1 = $1; 1.773 + my $part2 = $2; 1.774 + my $includenow = 0; 1.775 + my $file = 0; 1.776 + my $name = ""; 1.777 + if ($part2 =~ /^(%\s+)(.*)$/s ) { 1.778 + $part1.= $1; 1.779 + $part2 = $2; 1.780 + $includenow = 1; 1.781 + } 1.782 + $part2 =~ /^(\S+)(\s+)(.*)$/s; 1.783 + $name = $1; 1.784 + $part1.= $1.$2; 1.785 + $part2 = $3; 1.786 + if ( $part2 =~ /^(SYSTEM\s+)(.*)$/is ) { 1.787 + $part1.= $1; 1.788 + $part2 = $2; 1.789 + $file = 1; 1.790 + if ($self->{options}{'includeexternal'}) { 1.791 + $entities{$name} = $part2; 1.792 + $entities{$name} =~ s/^"?(.*?)".*$/$1/s; 1.793 + $entities{$name} = File::Spec->catfile($basedir, $entities{$name}); 1.794 + } 1.795 + } 1.796 + if ((not $file) and (not $includenow)) { 1.797 + if ($part2 =~ m/^\s*(["'])(.*)\1(\s*>.*)$/s) { 1.798 + my $comment = "Content of the $name entity"; 1.799 + my $quote = $1; 1.800 + my $text = $2; 1.801 + $part2 = $3; 1.802 + $text = $self->translate($text, 1.803 + $ref, 1.804 + $comment, 1.805 + 'wrap'=>1); 1.806 + $t = $part1."$quote$text$quote$part2"; 1.807 + } 1.808 + } 1.809 +# print $part1."\n"; 1.810 +# print $name."\n"; 1.811 +# print $part2."\n"; 1.812 + } 1.813 + $tag[$i] = $t; 1.814 + $i += 2; 1.815 + } 1.816 + return $self->join_lines(@tag); 1.817 +} 1.818 + 1.819 +sub tag_break_close { 1.820 + my ($self,@tag)=@_; 1.821 + my $struct = $self->get_path; 1.822 + my $options = $self->get_translate_options($struct); 1.823 + if ($options =~ m/[ip]/) { 1.824 + return 0; 1.825 + } else { 1.826 + return 1; 1.827 + } 1.828 +} 1.829 + 1.830 +sub tag_trans_close { 1.831 + my ($self,@tag)=@_; 1.832 + my $name = $self->get_tag_name(@tag); 1.833 + 1.834 + my $test = pop @path; 1.835 + if (!defined($test) || $test ne $name ) { 1.836 + my $ontagerror = $self->{options}{'ontagerror'}; 1.837 + if ($ontagerror eq "warn") { 1.838 + warn wrap_ref_mod($tag[1], "po4a::xml", dgettext("po4a", "Unexpected closing tag </%s> found. The main document may be wrong. Continuing..."), $name); 1.839 + } elsif ($ontagerror ne "silent") { 1.840 + die wrap_ref_mod($tag[1], "po4a::xml", dgettext("po4a", "Unexpected closing tag </%s> found. The main document may be wrong."), $name); 1.841 + } 1.842 + } 1.843 + return $self->join_lines(@tag); 1.844 +} 1.845 + 1.846 +sub CDATA_extract { 1.847 + my ($self,$remove)=(shift,shift); 1.848 + my ($eof, @tag) = $self->get_string_until(']]>',{include=>1,unquoted=>0,remove=>$remove}); 1.849 + 1.850 + return ($eof, @tag); 1.851 +} 1.852 + 1.853 +sub CDATA_trans { 1.854 + my ($self,@tag)=@_; 1.855 + return $self->found_string($self->join_lines(@tag), 1.856 + $tag[1], 1.857 + {'type' => "CDATA"}); 1.858 +} 1.859 + 1.860 +sub tag_break_alone { 1.861 + my ($self,@tag)=@_; 1.862 + my $struct = $self->get_path($self->get_tag_name(@tag)); 1.863 + if ($self->get_translate_options($struct) =~ m/i/) { 1.864 + return 0; 1.865 + } else { 1.866 + return 1; 1.867 + } 1.868 +} 1.869 + 1.870 +sub tag_trans_alone { 1.871 + my ($self,@tag)=@_; 1.872 + my $name = $self->get_tag_name(@tag); 1.873 + push @path, $name; 1.874 + 1.875 + $name = $self->treat_attributes(@tag); 1.876 + 1.877 + pop @path; 1.878 + return $name; 1.879 +} 1.880 + 1.881 +sub tag_break_open { 1.882 + my ($self,@tag)=@_; 1.883 + my $struct = $self->get_path($self->get_tag_name(@tag)); 1.884 + my $options = $self->get_translate_options($struct); 1.885 + if ($options =~ m/[ip]/) { 1.886 + return 0; 1.887 + } else { 1.888 + return 1; 1.889 + } 1.890 +} 1.891 + 1.892 +sub tag_trans_open { 1.893 + my ($self,@tag)=@_; 1.894 + my $name = $self->get_tag_name(@tag); 1.895 + push @path, $name; 1.896 + 1.897 + $name = $self->treat_attributes(@tag); 1.898 + 1.899 + return $name; 1.900 +} 1.901 + 1.902 +##### END of Generic XML tag types ##### 1.903 + 1.904 +=head1 INTERNAL FUNCTIONS used to write derivated parsers 1.905 + 1.906 +=head2 WORKING WITH TAGS 1.907 + 1.908 +=over 4 1.909 + 1.910 +=item get_path() 1.911 + 1.912 +This function returns the path to the current tag from the document's root, 1.913 +in the form E<lt>htmlE<gt>E<lt>bodyE<gt>E<lt>pE<gt>. 1.914 + 1.915 +An additional array of tags (without brackets) can be passed in argument. 1.916 +These path elements are added to the end of the current path. 1.917 + 1.918 +=cut 1.919 + 1.920 +sub get_path { 1.921 + my $self = shift; 1.922 + my @add = @_; 1.923 + if ( @path > 0 or @add > 0 ) { 1.924 + return "<".join("><",@path,@add).">"; 1.925 + } else { 1.926 + return "outside any tag (error?)"; 1.927 + } 1.928 +} 1.929 + 1.930 +=item tag_type() 1.931 + 1.932 +This function returns the index from the tag_types list that fits to the next 1.933 +tag in the input stream, or -1 if it's at the end of the input file. 1.934 + 1.935 +=cut 1.936 + 1.937 +sub tag_type { 1.938 + my $self = shift; 1.939 + my ($line,$ref) = $self->shiftline(); 1.940 + my ($match1,$match2); 1.941 + my $found = 0; 1.942 + my $i = 0; 1.943 + 1.944 + if (!defined($line)) { return -1; } 1.945 + 1.946 + $self->unshiftline($line,$ref); 1.947 + my ($eof,@lines) = $self->get_string_until(">",{include=>1,unquoted=>1}); 1.948 + my $line2 = $self->join_lines(@lines); 1.949 + while (!$found && $i < @tag_types) { 1.950 + ($match1,$match2) = ($tag_types[$i]->{beginning},$tag_types[$i]->{end}); 1.951 + if ($line =~ /^<\Q$match1\E/) { 1.952 + if (!defined($tag_types[$i]->{f_extract})) { 1.953 +#print substr($line2,length($line2)-1-length($match2),1+length($match2))."\n"; 1.954 + if (defined($line2) and $line2 =~ /\Q$match2\E>$/) { 1.955 + $found = 1; 1.956 +#print "YES: <".$match1." ".$match2.">\n"; 1.957 + } else { 1.958 +#print "NO: <".$match1." ".$match2.">\n"; 1.959 + $i++; 1.960 + } 1.961 + } else { 1.962 + $found = 1; 1.963 + } 1.964 + } else { 1.965 + $i++; 1.966 + } 1.967 + } 1.968 + if (!$found) { 1.969 + #It should never enter here, unless you undefine the most 1.970 + #general tags (as <...>) 1.971 + die "po4a::xml: Unknown tag type: ".$line."\n"; 1.972 + } else { 1.973 + return $i; 1.974 + } 1.975 +} 1.976 + 1.977 +=item extract_tag($$) 1.978 + 1.979 +This function returns the next tag from the input stream without the beginning 1.980 +and end, in an array form, to maintain the references from the input file. It 1.981 +has two parameters: the type of the tag (as returned by tag_type) and a 1.982 +boolean, that indicates if it should be removed from the input stream. 1.983 + 1.984 +=cut 1.985 + 1.986 +sub extract_tag { 1.987 + my ($self,$type,$remove) = (shift,shift,shift); 1.988 + my ($match1,$match2) = ($tag_types[$type]->{beginning},$tag_types[$type]->{end}); 1.989 + my ($eof,@tag); 1.990 + if (defined($tag_types[$type]->{f_extract})) { 1.991 + ($eof,@tag) = &{$tag_types[$type]->{f_extract}}($self,$remove); 1.992 + } else { 1.993 + ($eof,@tag) = $self->get_string_until($match2.">",{include=>1,remove=>$remove,unquoted=>1}); 1.994 + } 1.995 + $tag[0] =~ /^<\Q$match1\E(.*)$/s; 1.996 + $tag[0] = $1; 1.997 + $tag[$#tag-1] =~ /^(.*)\Q$match2\E>$/s; 1.998 + $tag[$#tag-1] = $1; 1.999 + return ($eof,@tag); 1.1000 +} 1.1001 + 1.1002 +=item get_tag_name(@) 1.1003 + 1.1004 +This function returns the name of the tag passed as an argument, in the array 1.1005 +form returned by extract_tag. 1.1006 + 1.1007 +=cut 1.1008 + 1.1009 +sub get_tag_name { 1.1010 + my ($self,@tag)=@_; 1.1011 + $tag[0] =~ /^(\S*)/; 1.1012 + return $1; 1.1013 +} 1.1014 + 1.1015 +=item breaking_tag() 1.1016 + 1.1017 +This function returns a boolean that says if the next tag in the input stream 1.1018 +is a breaking tag or not (inline tag). It leaves the input stream intact. 1.1019 + 1.1020 +=cut 1.1021 + 1.1022 +sub breaking_tag { 1.1023 + my $self = shift; 1.1024 + my $break; 1.1025 + 1.1026 + my $type = $self->tag_type; 1.1027 + if ($type == -1) { return 0; } 1.1028 + 1.1029 +#print "TAG TYPE = ".$type."\n"; 1.1030 + $break = $tag_types[$type]->{breaking}; 1.1031 + if (!defined($break)) { 1.1032 + # This tag's breaking depends on its content 1.1033 + my ($eof,@lines) = $self->extract_tag($type,0); 1.1034 + $break = &{$tag_types[$type]->{f_breaking}}($self,@lines); 1.1035 + } 1.1036 +#print "break = ".$break."\n"; 1.1037 + return $break; 1.1038 +} 1.1039 + 1.1040 +=item treat_tag() 1.1041 + 1.1042 +This function translates the next tag from the input stream. Using each 1.1043 +tag type's custom translation functions. 1.1044 + 1.1045 +=cut 1.1046 + 1.1047 +sub treat_tag { 1.1048 + my $self = shift; 1.1049 + my $type = $self->tag_type; 1.1050 + 1.1051 + my ($match1,$match2) = ($tag_types[$type]->{beginning},$tag_types[$type]->{end}); 1.1052 + my ($eof,@lines) = $self->extract_tag($type,1); 1.1053 + 1.1054 + $lines[0] =~ /^(\s*)(.*)$/s; 1.1055 + my $space1 = $1; 1.1056 + $lines[0] = $2; 1.1057 + $lines[$#lines-1] =~ /^(.*?)(\s*)$/s; 1.1058 + my $space2 = $2; 1.1059 + $lines[$#lines-1] = $1; 1.1060 + 1.1061 + # Calling this tag type's specific handling (translation of 1.1062 + # attributes...) 1.1063 + my $line = &{$tag_types[$type]->{f_translate}}($self,@lines); 1.1064 + $self->pushline("<".$match1.$space1.$line.$space2.$match2.">"); 1.1065 + return $eof; 1.1066 +} 1.1067 + 1.1068 +=item tag_in_list($@) 1.1069 + 1.1070 +This function returns a string value that says if the first argument (a tag 1.1071 +hierarchy) matches any of the tags from the second argument (a list of tags 1.1072 +or tag hierarchies). If it doesn't match, it returns 0. Else, it returns the 1.1073 +matched tag's options (the characters in front of the tag) or 1 (if that tag 1.1074 +doesn't have options). 1.1075 + 1.1076 +=back 1.1077 + 1.1078 +=cut 1.1079 +sub tag_in_list ($$$) { 1.1080 + my ($self,$path,$list) = @_; 1.1081 + if ($self->{options}{'caseinsensitive'}) { 1.1082 + $path = lc $path; 1.1083 + } 1.1084 + 1.1085 + while (1) { 1.1086 + if (defined $list->{$path}) { 1.1087 + if (length $list->{$path}) { 1.1088 + return $list->{$path}; 1.1089 + } else { 1.1090 + return 1; 1.1091 + } 1.1092 + } 1.1093 + last unless ($path =~ m/</); 1.1094 + $path =~ s/^<.*?>//; 1.1095 + } 1.1096 + 1.1097 + return 0; 1.1098 +} 1.1099 + 1.1100 +=head2 WORKING WITH ATTRIBUTES 1.1101 + 1.1102 +=over 4 1.1103 + 1.1104 +=item treat_attributes(@) 1.1105 + 1.1106 +This function handles the translation of the tags' attributes. It receives the tag 1.1107 +without the beginning / end marks, and then it finds the attributes, and it 1.1108 +translates the translatable ones (specified by the module option "attributes"). 1.1109 +This returns a plain string with the translated tag. 1.1110 + 1.1111 +=back 1.1112 + 1.1113 +=cut 1.1114 + 1.1115 +sub treat_attributes { 1.1116 + my ($self,@tag)=@_; 1.1117 + 1.1118 + $tag[0] =~ /^(\S*)(.*)/s; 1.1119 + my $text = $1; 1.1120 + $tag[0] = $2; 1.1121 + 1.1122 + while (@tag) { 1.1123 + my $complete = 1; 1.1124 + 1.1125 + $text .= $self->skip_spaces(\@tag); 1.1126 + if (@tag) { 1.1127 + # Get the attribute's name 1.1128 + $complete = 0; 1.1129 + 1.1130 + $tag[0] =~ /^([^\s=]+)(.*)/s; 1.1131 + my $name = $1; 1.1132 + my $ref = $tag[1]; 1.1133 + $tag[0] = $2; 1.1134 + $text .= $name; 1.1135 + $text .= $self->skip_spaces(\@tag); 1.1136 + if (@tag) { 1.1137 + # Get the '=' 1.1138 + if ($tag[0] =~ /^=(.*)/s) { 1.1139 + $tag[0] = $1; 1.1140 + $text .= "="; 1.1141 + $text .= $self->skip_spaces(\@tag); 1.1142 + if (@tag) { 1.1143 + # Get the value 1.1144 + my $value=""; 1.1145 + $ref=$tag[1]; 1.1146 + my $quot=substr($tag[0],0,1); 1.1147 + if ($quot ne "\"" and $quot ne "'") { 1.1148 + # Unquoted value 1.1149 + $quot=""; 1.1150 + $tag[0] =~ /^(\S+)(.*)/s; 1.1151 + $value = $1; 1.1152 + $tag[0] = $2; 1.1153 + } else { 1.1154 + # Quoted value 1.1155 + $text .= $quot; 1.1156 + $tag[0] =~ /^\Q$quot\E(.*)/s; 1.1157 + $tag[0] = $1; 1.1158 + while ($tag[0] !~ /\Q$quot\E/) { 1.1159 + $value .= $tag[0]; 1.1160 + shift @tag; 1.1161 + shift @tag; 1.1162 + } 1.1163 + $tag[0] =~ /^(.*?)\Q$quot\E(.*)/s; 1.1164 + $value .= $1; 1.1165 + $tag[0] = $2; 1.1166 + } 1.1167 + $complete = 1; 1.1168 + if ($self->tag_in_list($self->get_path.$name,$self->{attributes})) { 1.1169 + $text .= $self->found_string($value, $ref, { type=>"attribute", attribute=>$name }); 1.1170 + } else { 1.1171 + print wrap_ref_mod($ref, "po4a::xml", dgettext("po4a", "Content of attribute %s excluded: %s"), $self->get_path.$name, $value) 1.1172 + if $self->debug(); 1.1173 + $text .= $self->recode_skipped_text($value); 1.1174 + } 1.1175 + $text .= $quot; 1.1176 + } 1.1177 + } 1.1178 + } 1.1179 + 1.1180 + unless ($complete) { 1.1181 + my $ontagerror = $self->{options}{'ontagerror'}; 1.1182 + if ($ontagerror eq "warn") { 1.1183 + warn wrap_ref_mod($ref, "po4a::xml", dgettext ("po4a", "Bad attribute syntax. Continuing...")); 1.1184 + } elsif ($ontagerror ne "silent") { 1.1185 + die wrap_ref_mod($ref, "po4a::xml", dgettext ("po4a", "Bad attribute syntax")); 1.1186 + } 1.1187 + } 1.1188 + } 1.1189 + } 1.1190 + return $text; 1.1191 +} 1.1192 + 1.1193 +# Returns an empty string if the content in the $path should not be 1.1194 +# translated. 1.1195 +# 1.1196 +# Otherwise, returns the set of options for translation: 1.1197 +# w: the content shall be re-wrapped 1.1198 +# W: the content shall not be re-wrapped 1.1199 +# i: the tag shall be inlined 1.1200 +# p: a placeholder shall replace the tag (and its content) 1.1201 +# 1.1202 +# A translatable inline tag in an untranslated tag is treated as a translatable breaking tag. 1.1203 +my %translate_options_cache; 1.1204 +sub get_translate_options { 1.1205 + my $self = shift; 1.1206 + my $path = shift; 1.1207 + 1.1208 + if (defined $translate_options_cache{$path}) { 1.1209 + return $translate_options_cache{$path}; 1.1210 + } 1.1211 + 1.1212 + my $options = ""; 1.1213 + my $translate = 0; 1.1214 + my $usedefault = 1; 1.1215 + 1.1216 + my $inlist = 0; 1.1217 + my $tag = $self->get_tag_from_list($path, $self->{tags}); 1.1218 + if (defined $tag) { 1.1219 + $inlist = 1; 1.1220 + } 1.1221 + if ($self->{options}{'tagsonly'} eq $inlist) { 1.1222 + $usedefault = 0; 1.1223 + if (defined $tag) { 1.1224 + $options = $tag; 1.1225 + $options =~ s/<.*$//; 1.1226 + } else { 1.1227 + if ($self->{options}{'wrap'}) { 1.1228 + $options = "w"; 1.1229 + } else { 1.1230 + $options = "W"; 1.1231 + } 1.1232 + } 1.1233 + $translate = 1; 1.1234 + } 1.1235 + 1.1236 +# TODO: a less precise set of tags should not override a more precise one 1.1237 + # The tags and tagsonly options are deprecated. 1.1238 + # The translated and untranslated options have an higher priority. 1.1239 + $tag = $self->get_tag_from_list($path, $self->{translated}); 1.1240 + if (defined $tag) { 1.1241 + $usedefault = 0; 1.1242 + $options = $tag; 1.1243 + $options =~ s/<.*$//; 1.1244 + $translate = 1; 1.1245 + } 1.1246 + 1.1247 + if ($translate and $options !~ m/w/i) { 1.1248 + $options .= ($self->{options}{'wrap'})?"w":"W"; 1.1249 + } 1.1250 + 1.1251 + if (not defined $tag) { 1.1252 + $tag = $self->get_tag_from_list($path, $self->{untranslated}); 1.1253 + if (defined $tag) { 1.1254 + $usedefault = 0; 1.1255 + $options = ""; 1.1256 + $translate = 0; 1.1257 + } 1.1258 + } 1.1259 + 1.1260 + $tag = $self->get_tag_from_list($path, $self->{inline}); 1.1261 + if (defined $tag) { 1.1262 + $usedefault = 0; 1.1263 + $options .= "i"; 1.1264 + } else { 1.1265 + $tag = $self->get_tag_from_list($path, $self->{placeholder}); 1.1266 + if (defined $tag) { 1.1267 + $usedefault = 0; 1.1268 + $options .= "p"; 1.1269 + } 1.1270 + } 1.1271 + 1.1272 + if ($usedefault) { 1.1273 + $options = $self->{options}{'defaulttranslateoption'}; 1.1274 + } 1.1275 + 1.1276 + # A translatable inline tag in an untranslated tag is treated as a 1.1277 + # translatable breaking tag. 1.1278 + if ($options =~ m/i/) { 1.1279 + my $ppath = $path; 1.1280 + $ppath =~ s/<[^>]*>$//; 1.1281 + my $poptions = $self->get_translate_options ($ppath); 1.1282 + if ($poptions eq "") { 1.1283 + $options =~ s/i//; 1.1284 + } 1.1285 + } 1.1286 + 1.1287 + if ($options =~ m/i/ and $self->{options}{'foldattributes'}) { 1.1288 + $options .= "f"; 1.1289 + } 1.1290 + 1.1291 + $translate_options_cache{$path} = $options; 1.1292 + return $options; 1.1293 +} 1.1294 + 1.1295 + 1.1296 +# Return the tag (or biggest set of tags) of a list which matches with the 1.1297 +# given path. 1.1298 +# 1.1299 +# The tag (or set of tags) is returned with its options. 1.1300 +# 1.1301 +# If no tags could match the path, undef is returned. 1.1302 +sub get_tag_from_list ($$$) { 1.1303 + my ($self,$path,$list) = @_; 1.1304 + if ($self->{options}{'caseinsensitive'}) { 1.1305 + $path = lc $path; 1.1306 + } 1.1307 + 1.1308 + while (1) { 1.1309 + if (defined $list->{$path}) { 1.1310 + return $list->{$path}.$path; 1.1311 + } 1.1312 + last unless ($path =~ m/</); 1.1313 + $path =~ s/^<.*?>//; 1.1314 + } 1.1315 + 1.1316 + return undef; 1.1317 +} 1.1318 + 1.1319 + 1.1320 + 1.1321 +sub treat_content { 1.1322 + my $self = shift; 1.1323 + my $blank=""; 1.1324 + # Indicates if the paragraph will have to be translated 1.1325 + my $translate = ""; 1.1326 + 1.1327 + my ($eof,@paragraph)=$self->get_string_until('<',{remove=>1}); 1.1328 + 1.1329 + while (!$eof and !$self->breaking_tag) { 1.1330 + NEXT_TAG: 1.1331 + my @text; 1.1332 + my $type = $self->tag_type; 1.1333 + my $f_extract = $tag_types[$type]->{'f_extract'}; 1.1334 + if ( defined($f_extract) 1.1335 + and $f_extract eq \&tag_extract_comment) { 1.1336 + # Remove the content of the comments 1.1337 + ($eof, @text) = $self->extract_tag($type,1); 1.1338 + $text[$#text-1] .= "\0"; 1.1339 + if ($tag_types[$type]->{'beginning'} eq "!--#") { 1.1340 + $text[0] = "#".$text[0]; 1.1341 + } 1.1342 + push @comments, @text; 1.1343 + } else { 1.1344 + my ($tmpeof, @tag) = $self->extract_tag($type,0); 1.1345 + # Append the found inline tag 1.1346 + ($eof,@text)=$self->get_string_until('>', 1.1347 + {include=>1, 1.1348 + remove=>1, 1.1349 + unquoted=>1}); 1.1350 + # Append or remove the opening/closing tag from 1.1351 + # the tag path 1.1352 + if ($tag_types[$type]->{'end'} eq "") { 1.1353 + if ($tag_types[$type]->{'beginning'} eq "") { 1.1354 + # Opening inline tag 1.1355 + my $cur_tag_name = $self->get_tag_name(@tag); 1.1356 + my $t_opts = $self->get_translate_options($self->get_path($cur_tag_name)); 1.1357 + if ($t_opts =~ m/p/) { 1.1358 + # We enter a new holder. 1.1359 + # Append a <placeholder ...> tag to the current 1.1360 + # paragraph, and save the @paragraph in the 1.1361 + # current holder. 1.1362 + my $last_holder = $save_holders[$#save_holders]; 1.1363 + my $placeholder_str = "<placeholder type=\"".$cur_tag_name."\" id=\"".($#{$last_holder->{'sub_translations'}}+1)."\"/>"; 1.1364 + push @paragraph, ($placeholder_str, $text[1]); 1.1365 + my @saved_paragraph = @paragraph; 1.1366 + 1.1367 + $last_holder->{'paragraph'} = \@saved_paragraph; 1.1368 + 1.1369 + # Then we must push a new holder 1.1370 + my @new_paragraph = (); 1.1371 + my @sub_translations = (); 1.1372 + my %folded_attributes; 1.1373 + my %new_holder = ('paragraph' => \@new_paragraph, 1.1374 + 'open' => $text[0], 1.1375 + 'translation' => "", 1.1376 + 'close' => undef, 1.1377 + 'sub_translations' => \@sub_translations, 1.1378 + 'folded_attributes' => \%folded_attributes); 1.1379 + push @save_holders, \%new_holder; 1.1380 + @text = (); 1.1381 + 1.1382 + # The current @paragraph 1.1383 + # (for the current holder) 1.1384 + # is empty. 1.1385 + @paragraph = (); 1.1386 + } elsif ($t_opts =~ m/f/) { 1.1387 + my $tag_full = $self->join_lines(@text); 1.1388 + my $tag_ref = $text[1]; 1.1389 + if ($tag_full =~ m/^<\s*\S+\s+\S.*>$/s) { 1.1390 + my $holder = $save_holders[$#save_holders]; 1.1391 + my $id = 0; 1.1392 + foreach (keys %{$holder->{folded_attributes}}) { 1.1393 + $id = $_ + 1 if ($_ >= $id); 1.1394 + } 1.1395 + $holder->{folded_attributes}->{$id} = $tag_full; 1.1396 + 1.1397 + @text = ("<$cur_tag_name po4a-id=$id>", $tag_ref); 1.1398 + } 1.1399 + } 1.1400 + push @path, $cur_tag_name; 1.1401 + } elsif ($tag_types[$type]->{'beginning'} eq "/") { 1.1402 + # Closing inline tag 1.1403 + 1.1404 + # Check if this is closing the 1.1405 + # last opening tag we detected. 1.1406 + my $test = pop @path; 1.1407 + my $name = $self->get_tag_name(@tag); 1.1408 + if (!defined($test) || 1.1409 + $test ne $name ) { 1.1410 + my $ontagerror = $self->{options}{'ontagerror'}; 1.1411 + if ($ontagerror eq "warn") { 1.1412 + warn wrap_ref_mod($tag[1], "po4a::xml", dgettext("po4a", "Unexpected closing tag </%s> found. The main document may be wrong. Continuing..."), $name); 1.1413 + } elsif ($ontagerror ne "silent") { 1.1414 + die wrap_ref_mod($tag[1], "po4a::xml", dgettext("po4a", "Unexpected closing tag </%s> found. The main document may be wrong."), $name); 1.1415 + } 1.1416 + } 1.1417 + 1.1418 + if ($self->get_translate_options($self->get_path($self->get_tag_name(@tag))) =~ m/p/) { 1.1419 + # This closes the current holder. 1.1420 + 1.1421 + push @path, $self->get_tag_name(@tag); 1.1422 + # Now translate this paragraph if needed. 1.1423 + # This will call pushline and append the 1.1424 + # translation to the current holder's translation. 1.1425 + $self->translate_paragraph(@paragraph); 1.1426 + pop @path; 1.1427 + 1.1428 + # Now that this holder is closed, we can remove 1.1429 + # the holder from the stack. 1.1430 + my $holder = pop @save_holders; 1.1431 + # We need to keep the translation of this holder 1.1432 + my $translation = $holder->{'open'}.$holder->{'translation'}.$text[0]; 1.1433 + # FIXME: @text could be multilines. 1.1434 + 1.1435 + @text = (); 1.1436 + 1.1437 + # Then we store the translation in the previous 1.1438 + # holder's sub_translations array 1.1439 + my $previous_holder = $save_holders[$#save_holders]; 1.1440 + push @{$previous_holder->{'sub_translations'}}, $translation; 1.1441 + # We also need to restore the @paragraph array, as 1.1442 + # it was before we encountered the holder. 1.1443 + @paragraph = @{$previous_holder->{'paragraph'}}; 1.1444 + } 1.1445 + } 1.1446 + } 1.1447 + push @paragraph, @text; 1.1448 + } 1.1449 + 1.1450 + # Next tag 1.1451 + ($eof,@text)=$self->get_string_until('<',{remove=>1}); 1.1452 + if ($#text > 0) { 1.1453 + # Check if text (extracted after the inline tag) 1.1454 + # has to be translated 1.1455 + push @paragraph, @text; 1.1456 + } 1.1457 + } 1.1458 + 1.1459 + # This strips the extracted strings 1.1460 + # (only if you don't specify the 'nostrip' option, and if the 1.1461 + # paragraph can be re-wrapped) 1.1462 + $translate = $self->get_translate_options($self->get_path); 1.1463 + if (!$self->{options}{'nostrip'} and $translate !~ m/W/) { 1.1464 + my $clean = 0; 1.1465 + # Clean the beginning 1.1466 + while (!$clean and $#paragraph > 0) { 1.1467 + $paragraph[0] =~ /^(\s*)(.*)/s; 1.1468 + my $match = $1; 1.1469 + if ($paragraph[0] eq $match) { 1.1470 + if ($match ne "") { 1.1471 + $self->pushline($match); 1.1472 + } 1.1473 + shift @paragraph; 1.1474 + shift @paragraph; 1.1475 + } else { 1.1476 + $paragraph[0] = $2; 1.1477 + if ($match ne "") { 1.1478 + $self->pushline($match); 1.1479 + } 1.1480 + $clean = 1; 1.1481 + } 1.1482 + } 1.1483 + $clean = 0; 1.1484 + # Clean the end 1.1485 + while (!$clean and $#paragraph > 0) { 1.1486 + $paragraph[$#paragraph-1] =~ /^(.*?)(\s*)$/s; 1.1487 + my $match = $2; 1.1488 + if ($paragraph[$#paragraph-1] eq $match) { 1.1489 + if ($match ne "") { 1.1490 + $blank = $match.$blank; 1.1491 + } 1.1492 + pop @paragraph; 1.1493 + pop @paragraph; 1.1494 + } else { 1.1495 + $paragraph[$#paragraph-1] = $1; 1.1496 + if ($match ne "") { 1.1497 + $blank = $match.$blank; 1.1498 + } 1.1499 + $clean = 1; 1.1500 + } 1.1501 + } 1.1502 + } 1.1503 + 1.1504 + # Translate the string when needed 1.1505 + # This will either push the translation in the translated document or 1.1506 + # in the current holder translation. 1.1507 + $self->translate_paragraph(@paragraph); 1.1508 + 1.1509 + # Push the trailing blanks 1.1510 + if ($blank ne "") { 1.1511 + $self->pushline($blank); 1.1512 + } 1.1513 + return $eof; 1.1514 +} 1.1515 + 1.1516 +# Translate a @paragraph array of (string, reference). 1.1517 +# The $translate argument indicates if the strings must be translated or 1.1518 +# just pushed 1.1519 +sub translate_paragraph { 1.1520 + my $self = shift; 1.1521 + my @paragraph = @_; 1.1522 + my $translate = $self->get_translate_options($self->get_path); 1.1523 + 1.1524 + while ( (scalar @paragraph) 1.1525 + and ($paragraph[0] =~ m/^\s*\n/s)) { 1.1526 + $self->pushline($paragraph[0]); 1.1527 + shift @paragraph; 1.1528 + shift @paragraph; 1.1529 + } 1.1530 + 1.1531 + my $comments; 1.1532 + while (@comments) { 1.1533 + my ($comment,$eoc); 1.1534 + do { 1.1535 + my ($t,$l) = (shift @comments, shift @comments); 1.1536 + $t =~ s/\n?(\0)?$//; 1.1537 + $eoc = $1; 1.1538 + $comment .= "\n" if defined $comment; 1.1539 + $comment .= $t; 1.1540 + } until ($eoc); 1.1541 + $comments .= "\n" if defined $comments; 1.1542 + $comments .= $comment; 1.1543 + $self->pushline("<!--".$comment."-->\n") if defined $comment; 1.1544 + } 1.1545 + @comments = (); 1.1546 + 1.1547 + if ($self->{options}{'cpp'}) { 1.1548 + my @tmp = @paragraph; 1.1549 + @paragraph = (); 1.1550 + while (@tmp) { 1.1551 + my ($t,$l) = (shift @tmp, shift @tmp); 1.1552 + # #include can be followed by a filename between 1.1553 + # <> brackets. In that case, the argument won't be 1.1554 + # handled in the same call to translate_paragraph. 1.1555 + # Thus do not try to match "include ". 1.1556 + if ($t =~ m/^#[ \t]*(if |endif|undef |include|else|ifdef |ifndef |define )/si) { 1.1557 + if (@paragraph) { 1.1558 + $self->translate_paragraph(@paragraph); 1.1559 + @paragraph = (); 1.1560 + $self->pushline("\n"); 1.1561 + } 1.1562 + $self->pushline($t); 1.1563 + } else { 1.1564 + push @paragraph, ($t,$l); 1.1565 + } 1.1566 + } 1.1567 + } 1.1568 + 1.1569 + my $para = $self->join_lines(@paragraph); 1.1570 + if ( length($para) > 0 ) { 1.1571 + if ($translate ne "") { 1.1572 + # This tag should be translated 1.1573 + $self->pushline($self->found_string( 1.1574 + $para, 1.1575 + $paragraph[1], { 1.1576 + type=>"tag", 1.1577 + tag_options=>$translate, 1.1578 + comments=>$comments 1.1579 + })); 1.1580 + } else { 1.1581 + # Inform that this tag isn't translated in debug mode 1.1582 + print wrap_ref_mod($paragraph[1], "po4a::xml", dgettext ("po4a", "Content of tag %s excluded: %s"), $self->get_path, $para) 1.1583 + if $self->debug(); 1.1584 + $self->pushline($self->recode_skipped_text($para)); 1.1585 + } 1.1586 + } 1.1587 + # Now the paragraph is fully translated. 1.1588 + # If we have all the holders' translation, we can replace the 1.1589 + # placeholders by their translations. 1.1590 + # We must wait to have all the translations because the holders are 1.1591 + # numbered. 1.1592 + { 1.1593 + my $holder = $save_holders[$#save_holders]; 1.1594 + my $translation = $holder->{'translation'}; 1.1595 + 1.1596 + # Count the number of <placeholder ...> in $translation 1.1597 + my $count = 0; 1.1598 + my $str = $translation; 1.1599 + while ( (defined $str) 1.1600 + and ($str =~ m/^.*?<placeholder\s+type="[^"]+"\s+id="(\d+)"\s*\/>(.*)$/s)) { 1.1601 + $count += 1; 1.1602 + $str = $2; 1.1603 + if ($holder->{'sub_translations'}->[$1] =~ m/<placeholder\s+type="[^"]+"\s+id="(\d+)"\s*\/>/s) { 1.1604 + $count = -1; 1.1605 + last; 1.1606 + } 1.1607 + } 1.1608 + 1.1609 + if ( (defined $translation) 1.1610 + and (scalar(@{$holder->{'sub_translations'}}) == $count)) { 1.1611 + # OK, all the holders of the current paragraph are 1.1612 + # closed (and translated). 1.1613 + # Replace them by their translation. 1.1614 + while ($translation =~ m/^(.*?)<placeholder\s+type="[^"]+"\s+id="(\d+)"\s*\/>(.*)$/s) { 1.1615 + # FIXME: we could also check that 1.1616 + # * the holder exists 1.1617 + # * all the holders are used 1.1618 + $translation = $1.$holder->{'sub_translations'}->[$2].$3; 1.1619 + } 1.1620 + # We have our translation 1.1621 + $holder->{'translation'} = $translation; 1.1622 + # And there is no need for any holder in it. 1.1623 + my @sub_translations = (); 1.1624 + $holder->{'sub_translations'} = \@sub_translations; 1.1625 + } 1.1626 + } 1.1627 + 1.1628 +} 1.1629 + 1.1630 + 1.1631 + 1.1632 +=head2 WORKING WITH THE MODULE OPTIONS 1.1633 + 1.1634 +=over 4 1.1635 + 1.1636 +=item treat_options() 1.1637 + 1.1638 +This function fills the internal structures that contain the tags, attributes 1.1639 +and inline data with the options of the module (specified in the command-line 1.1640 +or in the initialize function). 1.1641 + 1.1642 +=back 1.1643 + 1.1644 +=cut 1.1645 + 1.1646 +sub treat_options { 1.1647 + my $self = shift; 1.1648 + 1.1649 + if ($self->{options}{'caseinsensitive'}) { 1.1650 + $self->{options}{'nodefault'} = lc $self->{options}{'nodefault'}; 1.1651 + $self->{options}{'tags'} = lc $self->{options}{'tags'}; 1.1652 + $self->{options}{'break'} = lc $self->{options}{'break'}; 1.1653 + $self->{options}{'_default_break'} = lc $self->{options}{'_default_break'}; 1.1654 + $self->{options}{'translated'} = lc $self->{options}{'translated'}; 1.1655 + $self->{options}{'_default_translated'} = lc $self->{options}{'_default_translated'}; 1.1656 + $self->{options}{'untranslated'} = lc $self->{options}{'untranslated'}; 1.1657 + $self->{options}{'_default_untranslated'} = lc $self->{options}{'_default_untranslated'}; 1.1658 + $self->{options}{'attributes'} = lc $self->{options}{'attributes'}; 1.1659 + $self->{options}{'_default_attributes'} = lc $self->{options}{'_default_attributes'}; 1.1660 + $self->{options}{'inline'} = lc $self->{options}{'inline'}; 1.1661 + $self->{options}{'_default_inline'} = lc $self->{options}{'_default_inline'}; 1.1662 + $self->{options}{'placeholder'} = lc $self->{options}{'placeholder'}; 1.1663 + $self->{options}{'_default_placeholder'} = lc $self->{options}{'_default_placeholder'}; 1.1664 + } 1.1665 + 1.1666 + $self->{options}{'nodefault'} =~ /^\s*(.*)\s*$/s; 1.1667 + my %list_nodefault; 1.1668 + foreach (split(/\s+/s,$1)) { 1.1669 + $list_nodefault{$_} = 1; 1.1670 + } 1.1671 + $self->{nodefault} = \%list_nodefault; 1.1672 + 1.1673 + $self->{options}{'tags'} =~ /^\s*(.*)\s*$/s; 1.1674 + if (length $self->{options}{'tags'}) { 1.1675 + warn wrap_mod("po4a::xml", 1.1676 + dgettext("po4a", 1.1677 + "The '%s' option is deprecated. Please use the translated/untranslated and/or break/inline/placeholder categories."), "tags"); 1.1678 + } 1.1679 + foreach (split(/\s+/s,$1)) { 1.1680 + $_ =~ m/^(.*?)(<.*)$/; 1.1681 + $self->{tags}->{$2} = $1 || ""; 1.1682 + } 1.1683 + 1.1684 + if ($self->{options}{'tagsonly'}) { 1.1685 + warn wrap_mod("po4a::xml", 1.1686 + dgettext("po4a", 1.1687 + "The '%s' option is deprecated. Please use the translated/untranslated and/or break/inline/placeholder categories."), "tagsonly"); 1.1688 + } 1.1689 + 1.1690 + $self->{options}{'break'} =~ /^\s*(.*)\s*$/s; 1.1691 + foreach my $tag (split(/\s+/s,$1)) { 1.1692 + $tag =~ m/^(.*?)(<.*)$/; 1.1693 + $self->{break}->{$2} = $1 || ""; 1.1694 + } 1.1695 + $self->{options}{'_default_break'} =~ /^\s*(.*)\s*$/s; 1.1696 + foreach my $tag (split(/\s+/s,$1)) { 1.1697 + $tag =~ m/^(.*?)(<.*)$/; 1.1698 + $self->{break}->{$2} = $1 || "" 1.1699 + unless $list_nodefault{$2} 1.1700 + or defined $self->{break}->{$2}; 1.1701 + } 1.1702 + 1.1703 + $self->{options}{'translated'} =~ /^\s*(.*)\s*$/s; 1.1704 + foreach my $tag (split(/\s+/s,$1)) { 1.1705 + $tag =~ m/^(.*?)(<.*)$/; 1.1706 + $self->{translated}->{$2} = $1 || ""; 1.1707 + } 1.1708 + $self->{options}{'_default_translated'} =~ /^\s*(.*)\s*$/s; 1.1709 + foreach my $tag (split(/\s+/s,$1)) { 1.1710 + $tag =~ m/^(.*?)(<.*)$/; 1.1711 + $self->{translated}->{$2} = $1 || "" 1.1712 + unless $list_nodefault{$2} 1.1713 + or defined $self->{translated}->{$2}; 1.1714 + } 1.1715 + 1.1716 + $self->{options}{'untranslated'} =~ /^\s*(.*)\s*$/s; 1.1717 + foreach my $tag (split(/\s+/s,$1)) { 1.1718 + $tag =~ m/^(.*?)(<.*)$/; 1.1719 + $self->{untranslated}->{$2} = $1 || ""; 1.1720 + } 1.1721 + $self->{options}{'_default_untranslated'} =~ /^\s*(.*)\s*$/s; 1.1722 + foreach my $tag (split(/\s+/s,$1)) { 1.1723 + $tag =~ m/^(.*?)(<.*)$/; 1.1724 + $self->{untranslated}->{$2} = $1 || "" 1.1725 + unless $list_nodefault{$2} 1.1726 + or defined $self->{untranslated}->{$2}; 1.1727 + } 1.1728 + 1.1729 + $self->{options}{'attributes'} =~ /^\s*(.*)\s*$/s; 1.1730 + foreach my $tag (split(/\s+/s,$1)) { 1.1731 + if ($tag =~ m/^(.*?)(<.*)$/) { 1.1732 + $self->{attributes}->{$2} = $1 || ""; 1.1733 + } else { 1.1734 + $self->{attributes}->{$tag} = ""; 1.1735 + } 1.1736 + } 1.1737 + $self->{options}{'_default_attributes'} =~ /^\s*(.*)\s*$/s; 1.1738 + foreach my $tag (split(/\s+/s,$1)) { 1.1739 + if ($tag =~ m/^(.*?)(<.*)$/) { 1.1740 + $self->{attributes}->{$2} = $1 || "" 1.1741 + unless $list_nodefault{$2} 1.1742 + or defined $self->{attributes}->{$2}; 1.1743 + } else { 1.1744 + $self->{attributes}->{$tag} = "" 1.1745 + unless $list_nodefault{$tag} 1.1746 + or defined $self->{attributes}->{$tag}; 1.1747 + } 1.1748 + } 1.1749 + 1.1750 + my @list_inline; 1.1751 + $self->{options}{'inline'} =~ /^\s*(.*)\s*$/s; 1.1752 + foreach my $tag (split(/\s+/s,$1)) { 1.1753 + $tag =~ m/^(.*?)(<.*)$/; 1.1754 + $self->{inline}->{$2} = $1 || ""; 1.1755 + } 1.1756 + $self->{options}{'_default_inline'} =~ /^\s*(.*)\s*$/s; 1.1757 + foreach my $tag (split(/\s+/s,$1)) { 1.1758 + $tag =~ m/^(.*?)(<.*)$/; 1.1759 + $self->{inline}->{$2} = $1 || "" 1.1760 + unless $list_nodefault{$2} 1.1761 + or defined $self->{inline}->{$2}; 1.1762 + } 1.1763 + 1.1764 + $self->{options}{'placeholder'} =~ /^\s*(.*)\s*$/s; 1.1765 + foreach my $tag (split(/\s+/s,$1)) { 1.1766 + $tag =~ m/^(.*?)(<.*)$/; 1.1767 + $self->{placeholder}->{$2} = $1 || ""; 1.1768 + } 1.1769 + $self->{options}{'_default_placeholder'} =~ /^\s*(.*)\s*$/s; 1.1770 + foreach my $tag (split(/\s+/s,$1)) { 1.1771 + $tag =~ m/^(.*?)(<.*)$/; 1.1772 + $self->{placeholder}->{$2} = $1 || "" 1.1773 + unless $list_nodefault{$2} 1.1774 + or defined $self->{placeholder}->{$2}; 1.1775 + } 1.1776 + 1.1777 + # There should be no translated and untranslated tags 1.1778 + foreach my $tag (keys %{$self->{translated}}) { 1.1779 + die wrap_mod("po4a::xml", 1.1780 + dgettext("po4a", 1.1781 + "Tag '%s' both in the %s and %s categories."), $tag, "translated", "untranslated") 1.1782 + if defined $self->{untranslated}->{$tag}; 1.1783 + } 1.1784 + # There should be no inline, break, and placeholder tags 1.1785 + foreach my $tag (keys %{$self->{inline}}) { 1.1786 + die wrap_mod("po4a::xml", 1.1787 + dgettext("po4a", 1.1788 + "Tag '%s' both in the %s and %s categories."), $tag, "inline", "break") 1.1789 + if defined $self->{break}->{$tag}; 1.1790 + die wrap_mod("po4a::xml", 1.1791 + dgettext("po4a", 1.1792 + "Tag '%s' both in the %s and %s categories."), $tag, "inline", "placeholder") 1.1793 + if defined $self->{placeholder}->{$tag}; 1.1794 + } 1.1795 + foreach my $tag (keys %{$self->{break}}) { 1.1796 + die wrap_mod("po4a::xml", 1.1797 + dgettext("po4a", 1.1798 + "Tag '%s' both in the %s and %s categories."), $tag, "break", "placeholder") 1.1799 + if defined $self->{placeholder}->{$tag}; 1.1800 + } 1.1801 +} 1.1802 + 1.1803 +=head2 GETTING TEXT FROM THE INPUT DOCUMENT 1.1804 + 1.1805 +=over 1.1806 + 1.1807 +=item get_string_until($%) 1.1808 + 1.1809 +This function returns an array with the lines (and references) from the input 1.1810 +document until it finds the first argument. The second argument is an options 1.1811 +hash. Value 0 means disabled (the default) and 1, enabled. 1.1812 + 1.1813 +The valid options are: 1.1814 + 1.1815 +=over 4 1.1816 + 1.1817 +=item include 1.1818 + 1.1819 +This makes the returned array to contain the searched text 1.1820 + 1.1821 +=item remove 1.1822 + 1.1823 +This removes the returned stream from the input 1.1824 + 1.1825 +=item unquoted 1.1826 + 1.1827 +This ensures that the searched text is outside any quotes 1.1828 + 1.1829 +=back 1.1830 + 1.1831 +=cut 1.1832 + 1.1833 +sub get_string_until { 1.1834 + my ($self,$search) = (shift,shift); 1.1835 + my $options = shift; 1.1836 + my ($include,$remove,$unquoted, $regex) = (0,0,0,0); 1.1837 + 1.1838 + if (defined($options->{include})) { $include = $options->{include}; } 1.1839 + if (defined($options->{remove})) { $remove = $options->{remove}; } 1.1840 + if (defined($options->{unquoted})) { $unquoted = $options->{unquoted}; } 1.1841 + if (defined($options->{regex})) { $regex = $options->{regex}; } 1.1842 + 1.1843 + my ($line,$ref) = $self->shiftline(); 1.1844 + my (@text,$paragraph); 1.1845 + my ($eof,$found) = (0,0); 1.1846 + 1.1847 + $search = "\Q$search\E" unless $regex; 1.1848 + while (defined($line) and !$found) { 1.1849 + push @text, ($line,$ref); 1.1850 + $paragraph .= $line; 1.1851 + if ($unquoted) { 1.1852 + if ( $paragraph =~ /^((\".*?\")|(\'.*?\')|[^\"\'])*$search/s ) { 1.1853 + $found = 1; 1.1854 + } 1.1855 + } else { 1.1856 + if ( $paragraph =~ /$search/s ) { 1.1857 + $found = 1; 1.1858 + } 1.1859 + } 1.1860 + if (!$found) { 1.1861 + ($line,$ref)=$self->shiftline(); 1.1862 + } 1.1863 + } 1.1864 + 1.1865 + if (!defined($line)) { $eof = 1; } 1.1866 + 1.1867 + if ( $found ) { 1.1868 + $line = ""; 1.1869 + if($unquoted) { 1.1870 + $paragraph =~ /^(?:(?:\".*?\")|(?:\'.*?\')|[^\"\'])*?$search(.*)$/s; 1.1871 + $line = $1; 1.1872 + $text[$#text-1] =~ s/\Q$line\E$//s; 1.1873 + } else { 1.1874 + $paragraph =~ /$search(.*)$/s; 1.1875 + $line = $1; 1.1876 + $text[$#text-1] =~ s/\Q$line\E$//s; 1.1877 + } 1.1878 + if(!$include) { 1.1879 + $text[$#text-1] =~ /^(.*)($search.*)$/s; 1.1880 + $text[$#text-1] = $1; 1.1881 + $line = $2.$line; 1.1882 + } 1.1883 + if (defined($line) and ($line ne "")) { 1.1884 + $self->unshiftline ($line,$text[$#text]); 1.1885 + } 1.1886 + } 1.1887 + if (!$remove) { 1.1888 + $self->unshiftline (@text); 1.1889 + } 1.1890 + 1.1891 + #If we get to the end of the file, we return the whole paragraph 1.1892 + return ($eof,@text); 1.1893 +} 1.1894 + 1.1895 +=item skip_spaces(\@) 1.1896 + 1.1897 +This function receives as argument the reference to a paragraph (in the format 1.1898 +returned by get_string_until), skips his heading spaces and returns them as 1.1899 +a simple string. 1.1900 + 1.1901 +=cut 1.1902 + 1.1903 +sub skip_spaces { 1.1904 + my ($self,$pstring)=@_; 1.1905 + my $space=""; 1.1906 + 1.1907 + while (@$pstring and (@$pstring[0] =~ /^(\s+)(.*)$/s or @$pstring[0] eq "")) { 1.1908 + if (@$pstring[0] ne "") { 1.1909 + $space .= $1; 1.1910 + @$pstring[0] = $2; 1.1911 + } 1.1912 + 1.1913 + if (@$pstring[0] eq "") { 1.1914 + shift @$pstring; 1.1915 + shift @$pstring; 1.1916 + } 1.1917 + } 1.1918 + return $space; 1.1919 +} 1.1920 + 1.1921 +=item join_lines(@) 1.1922 + 1.1923 +This function returns a simple string with the text from the argument array 1.1924 +(discarding the references). 1.1925 + 1.1926 +=cut 1.1927 + 1.1928 +sub join_lines { 1.1929 + my ($self,@lines)=@_; 1.1930 + my ($line,$ref); 1.1931 + my $text = ""; 1.1932 + while ($#lines > 0) { 1.1933 + ($line,$ref) = (shift @lines,shift @lines); 1.1934 + $text .= $line; 1.1935 + } 1.1936 + return $text; 1.1937 +} 1.1938 + 1.1939 +=back 1.1940 + 1.1941 +=head1 STATUS OF THIS MODULE 1.1942 + 1.1943 +This module can translate tags and attributes. 1.1944 + 1.1945 +=head1 TODO LIST 1.1946 + 1.1947 +DOCTYPE (ENTITIES) 1.1948 + 1.1949 +There is a minimal support for the translation of entities. They are 1.1950 +translated as a whole, and tags are not taken into account. Multilines 1.1951 +entities are not supported and entities are always rewrapped during the 1.1952 +translation. 1.1953 + 1.1954 +MODIFY TAG TYPES FROM INHERITED MODULES 1.1955 +(move the tag_types structure inside the $self hash?) 1.1956 + 1.1957 +=head1 SEE ALSO 1.1958 + 1.1959 +L<po4a(7)|po4a.7>, L<Locale::Po4a::TransTractor(3pm)|Locale::Po4a::TransTractor>. 1.1960 + 1.1961 +=head1 AUTHORS 1.1962 + 1.1963 + Jordi Vilalta <jvprat@gmail.com> 1.1964 + Nicolas François <nicolas.francois@centraliens.net> 1.1965 + 1.1966 +=head1 COPYRIGHT AND LICENSE 1.1967 + 1.1968 + Copyright (c) 2004 by Jordi Vilalta <jvprat@gmail.com> 1.1969 + Copyright (c) 2008-2009 by Nicolas François <nicolas.francois@centraliens.net> 1.1970 + 1.1971 +This program is free software; you may redistribute it and/or modify it 1.1972 +under the terms of GPL (see the COPYING file). 1.1973 + 1.1974 +=cut 1.1975 + 1.1976 +1;