hgbook

annotate tools/po4a/lib/Locale/Po4a/Xml.pm @ 627:8271c8891b0e

Add Chinese translation
author Dongsheng Song <dongsheng.song@gmail.com>
date Thu Mar 12 15:53:01 2009 +0800 (2009-03-12)
parents
children
rev   line source
dongsheng@623 1 #!/usr/bin/perl
dongsheng@623 2
dongsheng@623 3 # Po4a::Xml.pm
dongsheng@623 4 #
dongsheng@623 5 # extract and translate translatable strings from XML documents.
dongsheng@623 6 #
dongsheng@623 7 # This code extracts plain text from tags and attributes from generic
dongsheng@623 8 # XML documents, and it can be used as a base to build modules for
dongsheng@623 9 # XML-based documents.
dongsheng@623 10 #
dongsheng@623 11 # Copyright (c) 2004 by Jordi Vilalta <jvprat@gmail.com>
dongsheng@623 12 # Copyright (c) 2008-2009 by Nicolas François <nicolas.francois@centraliens.net>
dongsheng@623 13 #
dongsheng@623 14 # This program is free software; you can redistribute it and/or modify
dongsheng@623 15 # it under the terms of the GNU General Public License as published by
dongsheng@623 16 # the Free Software Foundation; either version 2 of the License, or
dongsheng@623 17 # (at your option) any later version.
dongsheng@623 18 #
dongsheng@623 19 # This program is distributed in the hope that it will be useful,
dongsheng@623 20 # but WITHOUT ANY WARRANTY; without even the implied warranty of
dongsheng@623 21 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
dongsheng@623 22 # GNU General Public License for more details.
dongsheng@623 23 #
dongsheng@623 24 # You should have received a copy of the GNU General Public License
dongsheng@623 25 # along with this program; if not, write to the Free Software
dongsheng@623 26 # Foundation, Inc.,
dongsheng@623 27 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
dongsheng@623 28 #
dongsheng@623 29 ########################################################################
dongsheng@623 30
dongsheng@623 31 =head1 NAME
dongsheng@623 32
dongsheng@623 33 Locale::Po4a::Xml - Convert XML documents and derivates from/to PO files
dongsheng@623 34
dongsheng@623 35 =head1 DESCRIPTION
dongsheng@623 36
dongsheng@623 37 The po4a (po for anything) project goal is to ease translations (and more
dongsheng@623 38 interestingly, the maintenance of translations) using gettext tools on
dongsheng@623 39 areas where they were not expected like documentation.
dongsheng@623 40
dongsheng@623 41 Locale::Po4a::Xml is a module to help the translation of XML documents into
dongsheng@623 42 other [human] languages. It can also be used as a base to build modules for
dongsheng@623 43 XML-based documents.
dongsheng@623 44
dongsheng@623 45 =cut
dongsheng@623 46
dongsheng@623 47 package Locale::Po4a::Xml;
dongsheng@623 48
dongsheng@623 49 use 5.006;
dongsheng@623 50 use strict;
dongsheng@623 51 use warnings;
dongsheng@623 52
dongsheng@623 53 require Exporter;
dongsheng@623 54 use vars qw(@ISA @EXPORT);
dongsheng@623 55 @ISA = qw(Locale::Po4a::TransTractor);
dongsheng@623 56 @EXPORT = qw(new initialize @tag_types);
dongsheng@623 57
dongsheng@623 58 use Locale::Po4a::TransTractor;
dongsheng@623 59 use Locale::Po4a::Common;
dongsheng@623 60 use Carp qw(croak);
dongsheng@623 61 use File::Basename;
dongsheng@623 62 use File::Spec;
dongsheng@623 63
dongsheng@623 64 #It will mantain the path from the root tag to the current one
dongsheng@623 65 my @path;
dongsheng@623 66
dongsheng@623 67 #It will contain a list of external entities and their attached paths
dongsheng@623 68 my %entities;
dongsheng@623 69
dongsheng@623 70 my @comments;
dongsheng@623 71
dongsheng@623 72 sub shiftline {
dongsheng@623 73 my $self = shift;
dongsheng@623 74 # call Transtractor's shiftline
dongsheng@623 75 my ($line,$ref) = $self->SUPER::shiftline();
dongsheng@623 76 return ($line,$ref) if (not defined $line);
dongsheng@623 77
dongsheng@623 78 for my $k (keys %entities) {
dongsheng@623 79 if ($line =~ m/^(.*?)&$k;(.*)$/s) {
dongsheng@623 80 my ($before, $after) = ($1, $2);
dongsheng@623 81 my $linenum=0;
dongsheng@623 82 my @textentries;
dongsheng@623 83
dongsheng@623 84 open (my $in, $entities{$k})
dongsheng@623 85 or croak wrap_mod("po4a::xml",
dongsheng@623 86 dgettext("po4a", "Can't read from %s: %s"),
dongsheng@623 87 $entities{$k}, $!);
dongsheng@623 88 while (defined (my $textline = <$in>)) {
dongsheng@623 89 $linenum++;
dongsheng@623 90 my $textref=$entities{$k}.":$linenum";
dongsheng@623 91 push @textentries, ($textline,$textref);
dongsheng@623 92 }
dongsheng@623 93 close $in
dongsheng@623 94 or croak wrap_mod("po4a::xml",
dongsheng@623 95 dgettext("po4a", "Can't close %s after reading: %s"),
dongsheng@623 96 $entities{$k}, $!);
dongsheng@623 97
dongsheng@623 98 push @textentries, ($after, $ref);
dongsheng@623 99 $line = $before.(shift @textentries);
dongsheng@623 100 $ref .= " ".(shift @textentries);
dongsheng@623 101 $self->unshiftline(@textentries);
dongsheng@623 102 }
dongsheng@623 103 }
dongsheng@623 104
dongsheng@623 105 return ($line,$ref);
dongsheng@623 106 }
dongsheng@623 107
dongsheng@623 108 sub read {
dongsheng@623 109 my ($self,$filename)=@_;
dongsheng@623 110 push @{$self->{DOCPOD}{infile}}, $filename;
dongsheng@623 111 $self->Locale::Po4a::TransTractor::read($filename);
dongsheng@623 112 }
dongsheng@623 113
dongsheng@623 114 sub parse {
dongsheng@623 115 my $self=shift;
dongsheng@623 116 map {$self->parse_file($_)} @{$self->{DOCPOD}{infile}};
dongsheng@623 117 }
dongsheng@623 118
dongsheng@623 119 # @save_holders is a stack of references to ('paragraph', 'translation',
dongsheng@623 120 # 'sub_translations', 'open', 'close', 'folded_attributes') hashes, where:
dongsheng@623 121 # paragraph is a reference to an array (see paragraph in the
dongsheng@623 122 # treat_content() subroutine) of strings followed by
dongsheng@623 123 # references. It contains the @paragraph array as it was
dongsheng@623 124 # before the processing was interrupted by a tag instroducing
dongsheng@623 125 # a placeholder.
dongsheng@623 126 # translation is the translation of this level up to now
dongsheng@623 127 # sub_translations is a reference to an array of strings containing the
dongsheng@623 128 # translations which must replace the placeholders.
dongsheng@623 129 # open is the tag which opened the placeholder.
dongsheng@623 130 # close is the tag which closed the placeholder.
dongsheng@623 131 # folded_attributes is an hash of tags with their attributes (<tag attrs=...>
dongsheng@623 132 # strings), referenced by the folded tag id, which should
dongsheng@623 133 # replace the <tag po4a-id=id> strings in the current
dongsheng@623 134 # translation.
dongsheng@623 135 #
dongsheng@623 136 # If @save_holders only has 1 holder, then we are not processing the
dongsheng@623 137 # content of an holder, we are translating the document.
dongsheng@623 138 my @save_holders;
dongsheng@623 139
dongsheng@623 140
dongsheng@623 141 # If we are at the bottom of the stack and there is no <placeholder ...> in
dongsheng@623 142 # the current translation, we can push the translation in the translated
dongsheng@623 143 # document.
dongsheng@623 144 # Otherwise, we keep the translation in the current holder.
dongsheng@623 145 sub pushline {
dongsheng@623 146 my ($self, $line) = (shift, shift);
dongsheng@623 147
dongsheng@623 148 my $holder = $save_holders[$#save_holders];
dongsheng@623 149 my $translation = $holder->{'translation'};
dongsheng@623 150 $translation .= $line;
dongsheng@623 151
dongsheng@623 152 while ( %{$holder->{folded_attributes}}
dongsheng@623 153 and $translation =~ m/^(.*)<([^>]+?)\s+po4a-id=([0-9]+)>(.*)$/s) {
dongsheng@623 154 my $begin = $1;
dongsheng@623 155 my $tag = $2;
dongsheng@623 156 my $id = $3;
dongsheng@623 157 my $end = $4;
dongsheng@623 158 if (defined $holder->{folded_attributes}->{$id}) {
dongsheng@623 159 # TODO: check if the tag is the same
dongsheng@623 160 $translation = $begin.$holder->{folded_attributes}->{$id}.$end;
dongsheng@623 161 delete $holder->{folded_attributes}->{$id};
dongsheng@623 162 } else {
dongsheng@623 163 # TODO: It will be hard to identify the location.
dongsheng@623 164 # => find a way to retrieve the reference.
dongsheng@623 165 die wrap_mod("po4a::xml", dgettext("po4a", "'po4a-id=%d' in the translation does not exist in the original string (or 'po4a-id=%d' used twice in the translation)."), $id, $id);
dongsheng@623 166 }
dongsheng@623 167 }
dongsheng@623 168 # TODO: check that %folded_attributes is empty at some time
dongsheng@623 169 # => in translate_paragraph?
dongsheng@623 170
dongsheng@623 171 if ( ($#save_holders > 0)
dongsheng@623 172 or ($translation =~ m/<placeholder\s+type="[^"]+"\s+id="(\d+)"\s*\/>/s)) {
dongsheng@623 173 $holder->{'translation'} = $translation;
dongsheng@623 174 } else {
dongsheng@623 175 $self->SUPER::pushline($translation);
dongsheng@623 176 $holder->{'translation'} = '';
dongsheng@623 177 }
dongsheng@623 178 }
dongsheng@623 179
dongsheng@623 180 =head1 TRANSLATING WITH PO4A::XML
dongsheng@623 181
dongsheng@623 182 This module can be used directly to handle generic XML documents. This will
dongsheng@623 183 extract all tag's content, and no attributes, since it's where the text is
dongsheng@623 184 written in most XML based documents.
dongsheng@623 185
dongsheng@623 186 There are some options (described in the next section) that can customize
dongsheng@623 187 this behavior. If this doesn't fit to your document format you're encouraged
dongsheng@623 188 to write your own module derived from this, to describe your format's details.
dongsheng@623 189 See the section "Writing derivate modules" below, for the process description.
dongsheng@623 190
dongsheng@623 191 =cut
dongsheng@623 192
dongsheng@623 193 #
dongsheng@623 194 # Parse file and translate it
dongsheng@623 195 #
dongsheng@623 196 sub parse_file {
dongsheng@623 197 my ($self,$filename) = @_;
dongsheng@623 198 my $eof = 0;
dongsheng@623 199
dongsheng@623 200 while (!$eof) {
dongsheng@623 201 # We get all the text until the next breaking tag (not
dongsheng@623 202 # inline) and translate it
dongsheng@623 203 $eof = $self->treat_content;
dongsheng@623 204 if (!$eof) {
dongsheng@623 205 # And then we treat the following breaking tag
dongsheng@623 206 $eof = $self->treat_tag;
dongsheng@623 207 }
dongsheng@623 208 }
dongsheng@623 209 }
dongsheng@623 210
dongsheng@623 211 =head1 OPTIONS ACCEPTED BY THIS MODULE
dongsheng@623 212
dongsheng@623 213 The global debug option causes this module to show the excluded strings, in
dongsheng@623 214 order to see if it skips something important.
dongsheng@623 215
dongsheng@623 216 These are this module's particular options:
dongsheng@623 217
dongsheng@623 218 =over 4
dongsheng@623 219
dongsheng@623 220 =item B<nostrip>
dongsheng@623 221
dongsheng@623 222 Prevents it to strip the spaces around the extracted strings.
dongsheng@623 223
dongsheng@623 224 =item B<wrap>
dongsheng@623 225
dongsheng@623 226 Canonizes the string to translate, considering that whitespaces are not
dongsheng@623 227 important, and wraps the translated document. This option can be overridden
dongsheng@623 228 by custom tag options. See the "tags" option below.
dongsheng@623 229
dongsheng@623 230 =item B<caseinsensitive>
dongsheng@623 231
dongsheng@623 232 It makes the tags and attributes searching to work in a case insensitive
dongsheng@623 233 way. If it's defined, it will treat E<lt>BooKE<gt>laNG and E<lt>BOOKE<gt>Lang as E<lt>bookE<gt>lang.
dongsheng@623 234
dongsheng@623 235 =item B<includeexternal>
dongsheng@623 236
dongsheng@623 237 When defined, external entities are included in the generated (translated)
dongsheng@623 238 document, and for the extraction of strings. If it's not defined, you
dongsheng@623 239 will have to translate external entities separately as independent
dongsheng@623 240 documents.
dongsheng@623 241
dongsheng@623 242 =item B<ontagerror>
dongsheng@623 243
dongsheng@623 244 This option defines the behavior of the module when it encounter a invalid
dongsheng@623 245 Xml syntax (a closing tag which does not match the last opening tag, or a
dongsheng@623 246 tag's attribute without value).
dongsheng@623 247 It can take the following values:
dongsheng@623 248
dongsheng@623 249 =over
dongsheng@623 250
dongsheng@623 251 =item I<fail>
dongsheng@623 252
dongsheng@623 253 This is the default value.
dongsheng@623 254 The module will exit with an error.
dongsheng@623 255
dongsheng@623 256 =item I<warn>
dongsheng@623 257
dongsheng@623 258 The module will continue, and will issue a warning.
dongsheng@623 259
dongsheng@623 260 =item I<silent>
dongsheng@623 261
dongsheng@623 262 The module will continue without any warnings.
dongsheng@623 263
dongsheng@623 264 =back
dongsheng@623 265
dongsheng@623 266 Be careful when using this option.
dongsheng@623 267 It is generally recommended to fix the input file.
dongsheng@623 268
dongsheng@623 269 =item B<tagsonly>
dongsheng@623 270
dongsheng@623 271 Extracts only the specified tags in the "tags" option. Otherwise, it
dongsheng@623 272 will extract all the tags except the ones specified.
dongsheng@623 273
dongsheng@623 274 Note: This option is deprecated.
dongsheng@623 275
dongsheng@623 276 =item B<doctype>
dongsheng@623 277
dongsheng@623 278 String that will try to match with the first line of the document's doctype
dongsheng@623 279 (if defined). If it doesn't, a warning will indicate that the document
dongsheng@623 280 might be of a bad type.
dongsheng@623 281
dongsheng@623 282 =item B<tags>
dongsheng@623 283
dongsheng@623 284 Space-separated list of tags you want to translate or skip. By default,
dongsheng@623 285 the specified tags will be excluded, but if you use the "tagsonly" option,
dongsheng@623 286 the specified tags will be the only ones included. The tags must be in the
dongsheng@623 287 form E<lt>aaaE<gt>, but you can join some (E<lt>bbbE<gt>E<lt>aaaE<gt>) to say that the content of
dongsheng@623 288 the tag E<lt>aaaE<gt> will only be translated when it's into a E<lt>bbbE<gt> tag.
dongsheng@623 289
dongsheng@623 290 You can also specify some tag options putting some characters in front of
dongsheng@623 291 the tag hierarchy. For example, you can put 'w' (wrap) or 'W' (don't wrap)
dongsheng@623 292 to override the default behavior specified by the global "wrap" option.
dongsheng@623 293
dongsheng@623 294 Example: WE<lt>chapterE<gt>E<lt>titleE<gt>
dongsheng@623 295
dongsheng@623 296 Note: This option is deprecated.
dongsheng@623 297 You should use the B<translated> and B<untranslated> options instead.
dongsheng@623 298
dongsheng@623 299 =item B<attributes>
dongsheng@623 300
dongsheng@623 301 Space-separated list of tag's attributes you want to translate. You can
dongsheng@623 302 specify the attributes by their name (for example, "lang"), but you can
dongsheng@623 303 prefix it with a tag hierarchy, to specify that this attribute will only be
dongsheng@623 304 translated when it's into the specified tag. For example: E<lt>bbbE<gt>E<lt>aaaE<gt>lang
dongsheng@623 305 specifies that the lang attribute will only be translated if it's into an
dongsheng@623 306 E<lt>aaaE<gt> tag, and it's into a E<lt>bbbE<gt> tag.
dongsheng@623 307
dongsheng@623 308 =item B<foldattributes>
dongsheng@623 309
dongsheng@623 310 Do not translate attributes in inline tags.
dongsheng@623 311 Instead, replace all attributes of a tag by po4a-id=<id>.
dongsheng@623 312
dongsheng@623 313 This is useful when attributes shall not be translated, as this simplifies the
dongsheng@623 314 strings for translators, and avoids typos.
dongsheng@623 315
dongsheng@623 316 =item B<break>
dongsheng@623 317
dongsheng@623 318 Space-separated list of tags which should break the sequence.
dongsheng@623 319 By default, all tags break the sequence.
dongsheng@623 320
dongsheng@623 321 The tags must be in the form <aaa>, but you can join some
dongsheng@623 322 (<bbb><aaa>), if a tag (<aaa>) should only be considered
dongsheng@623 323 when it's into another tag (<bbb>).
dongsheng@623 324
dongsheng@623 325 =item B<inline>
dongsheng@623 326
dongsheng@623 327 Space-separated list of tags which should be treated as inline.
dongsheng@623 328 By default, all tags break the sequence.
dongsheng@623 329
dongsheng@623 330 The tags must be in the form <aaa>, but you can join some
dongsheng@623 331 (<bbb><aaa>), if a tag (<aaa>) should only be considered
dongsheng@623 332 when it's into another tag (<bbb>).
dongsheng@623 333
dongsheng@623 334 =item B<placeholder>
dongsheng@623 335
dongsheng@623 336 Space-separated list of tags which should be treated as placeholders.
dongsheng@623 337 Placeholders do not break the sequence, but the content of placeholders is
dongsheng@623 338 translated separately.
dongsheng@623 339
dongsheng@623 340 The location of the placeholder in its blocks will be marked with a string
dongsheng@623 341 similar to:
dongsheng@623 342
dongsheng@623 343 <placeholder type=\"footnote\" id=\"0\"/>
dongsheng@623 344
dongsheng@623 345 The tags must be in the form <aaa>, but you can join some
dongsheng@623 346 (<bbb><aaa>), if a tag (<aaa>) should only be considered
dongsheng@623 347 when it's into another tag (<bbb>).
dongsheng@623 348
dongsheng@623 349 =item B<nodefault>
dongsheng@623 350
dongsheng@623 351 Space separated list of tags that the module should not try to set by
dongsheng@623 352 default in any category.
dongsheng@623 353
dongsheng@623 354 =item B<cpp>
dongsheng@623 355
dongsheng@623 356 Support C preprocessor directives.
dongsheng@623 357 When this option is set, po4a will consider preprocessor directives as
dongsheng@623 358 paragraph separators.
dongsheng@623 359 This is important if the XML file must be preprocessed because otherwise
dongsheng@623 360 the directives may be inserted in the middle of lines if po4a consider it
dongsheng@623 361 belong to the current paragraph, and they won't be recognized by the
dongsheng@623 362 preprocessor.
dongsheng@623 363 Note: the preprocessor directives must only appear between tags
dongsheng@623 364 (they must not break a tag).
dongsheng@623 365
dongsheng@623 366 =item B<translated>
dongsheng@623 367
dongsheng@623 368 Space-separated list of tags you want to translate.
dongsheng@623 369
dongsheng@623 370 The tags must be in the form <aaa>, but you can join some
dongsheng@623 371 (<bbb><aaa>), if a tag (<aaa>) should only be considered
dongsheng@623 372 when it's into another tag (<bbb>).
dongsheng@623 373
dongsheng@623 374 You can also specify some tag options putting some characters in front of
dongsheng@623 375 the tag hierarchy. For example, you can put 'w' (wrap) or 'W' (don't wrap)
dongsheng@623 376 to overide the default behavior specified by the global "wrap" option.
dongsheng@623 377
dongsheng@623 378 Example: WE<lt>chapterE<gt>E<lt>titleE<gt>
dongsheng@623 379
dongsheng@623 380 =item B<untranslated>
dongsheng@623 381
dongsheng@623 382 Space-separated list of tags you do not want to translate.
dongsheng@623 383
dongsheng@623 384 The tags must be in the form <aaa>, but you can join some
dongsheng@623 385 (<bbb><aaa>), if a tag (<aaa>) should only be considered
dongsheng@623 386 when it's into another tag (<bbb>).
dongsheng@623 387
dongsheng@623 388 =item B<defaulttranslateoption>
dongsheng@623 389
dongsheng@623 390 The default categories for tags that are not in any of the translated,
dongsheng@623 391 untranslated, break, inline, or placeholder.
dongsheng@623 392
dongsheng@623 393 This is a set of letters:
dongsheng@623 394
dongsheng@623 395 =over
dongsheng@623 396
dongsheng@623 397 =item I<w>
dongsheng@623 398
dongsheng@623 399 Tags should be translated and content can be re-wrapped.
dongsheng@623 400
dongsheng@623 401 =item I<W>
dongsheng@623 402
dongsheng@623 403 Tags should be translated and content should not be re-wrapped.
dongsheng@623 404
dongsheng@623 405 =item I<i>
dongsheng@623 406
dongsheng@623 407 Tags should be translated inline.
dongsheng@623 408
dongsheng@623 409 =item I<p>
dongsheng@623 410
dongsheng@623 411 Tags should be translated as placeholders.
dongsheng@623 412
dongsheng@623 413 =back
dongsheng@623 414
dongsheng@623 415 =back
dongsheng@623 416
dongsheng@623 417 =cut
dongsheng@623 418 # TODO: defaulttranslateoption
dongsheng@623 419 # w => indicate that it is only valid for translatable tags and do not
dongsheng@623 420 # care about inline/break/placeholder?
dongsheng@623 421 # ...
dongsheng@623 422
dongsheng@623 423 sub initialize {
dongsheng@623 424 my $self = shift;
dongsheng@623 425 my %options = @_;
dongsheng@623 426
dongsheng@623 427 # Reset the path
dongsheng@623 428 @path = ();
dongsheng@623 429
dongsheng@623 430 # Initialize the stack of holders
dongsheng@623 431 my @paragraph = ();
dongsheng@623 432 my @sub_translations = ();
dongsheng@623 433 my %folded_attributes;
dongsheng@623 434 my %holder = ('paragraph' => \@paragraph,
dongsheng@623 435 'translation' => "",
dongsheng@623 436 'sub_translations' => \@sub_translations,
dongsheng@623 437 'folded_attributes' => \%folded_attributes);
dongsheng@623 438 @save_holders = (\%holder);
dongsheng@623 439
dongsheng@623 440 $self->{options}{'nostrip'}=0;
dongsheng@623 441 $self->{options}{'wrap'}=0;
dongsheng@623 442 $self->{options}{'caseinsensitive'}=0;
dongsheng@623 443 $self->{options}{'tagsonly'}=0;
dongsheng@623 444 $self->{options}{'tags'}='';
dongsheng@623 445 $self->{options}{'break'}='';
dongsheng@623 446 $self->{options}{'translated'}='';
dongsheng@623 447 $self->{options}{'untranslated'}='';
dongsheng@623 448 $self->{options}{'defaulttranslateoption'}='';
dongsheng@623 449 $self->{options}{'attributes'}='';
dongsheng@623 450 $self->{options}{'foldattributes'}=0;
dongsheng@623 451 $self->{options}{'inline'}='';
dongsheng@623 452 $self->{options}{'placeholder'}='';
dongsheng@623 453 $self->{options}{'doctype'}='';
dongsheng@623 454 $self->{options}{'nodefault'}='';
dongsheng@623 455 $self->{options}{'includeexternal'}=0;
dongsheng@623 456 $self->{options}{'ontagerror'}="fail";
dongsheng@623 457 $self->{options}{'cpp'}=0;
dongsheng@623 458
dongsheng@623 459 $self->{options}{'verbose'}='';
dongsheng@623 460 $self->{options}{'debug'}='';
dongsheng@623 461
dongsheng@623 462 foreach my $opt (keys %options) {
dongsheng@623 463 if ($options{$opt}) {
dongsheng@623 464 die wrap_mod("po4a::xml",
dongsheng@623 465 dgettext("po4a", "Unknown option: %s"), $opt)
dongsheng@623 466 unless exists $self->{options}{$opt};
dongsheng@623 467 $self->{options}{$opt} = $options{$opt};
dongsheng@623 468 }
dongsheng@623 469 }
dongsheng@623 470 # Default options set by modules. Forbidden for users.
dongsheng@623 471 $self->{options}{'_default_translated'}='';
dongsheng@623 472 $self->{options}{'_default_untranslated'}='';
dongsheng@623 473 $self->{options}{'_default_break'}='';
dongsheng@623 474 $self->{options}{'_default_inline'}='';
dongsheng@623 475 $self->{options}{'_default_placeholder'}='';
dongsheng@623 476 $self->{options}{'_default_attributes'}='';
dongsheng@623 477
dongsheng@623 478 #It will maintain the list of the translatable tags
dongsheng@623 479 $self->{tags}=();
dongsheng@623 480 $self->{translated}=();
dongsheng@623 481 $self->{untranslated}=();
dongsheng@623 482 #It will maintain the list of the translatable attributes
dongsheng@623 483 $self->{attributes}=();
dongsheng@623 484 #It will maintain the list of the breaking tags
dongsheng@623 485 $self->{break}=();
dongsheng@623 486 #It will maintain the list of the inline tags
dongsheng@623 487 $self->{inline}=();
dongsheng@623 488 #It will maintain the list of the placeholder tags
dongsheng@623 489 $self->{placeholder}=();
dongsheng@623 490 #list of the tags that must not be set in the tags or inline category
dongsheng@623 491 #by this module or sub-module (unless specified in an option)
dongsheng@623 492 $self->{nodefault}=();
dongsheng@623 493
dongsheng@623 494 $self->treat_options;
dongsheng@623 495 }
dongsheng@623 496
dongsheng@623 497 =head1 WRITING DERIVATE MODULES
dongsheng@623 498
dongsheng@623 499 =head2 DEFINE WHAT TAGS AND ATTRIBUTES TO TRANSLATE
dongsheng@623 500
dongsheng@623 501 The simplest customization is to define which tags and attributes you want
dongsheng@623 502 the parser to translate. This should be done in the initialize function.
dongsheng@623 503 First you should call the main initialize, to get the command-line options,
dongsheng@623 504 and then, append your custom definitions to the options hash. If you want
dongsheng@623 505 to treat some new options from command line, you should define them before
dongsheng@623 506 calling the main initialize:
dongsheng@623 507
dongsheng@623 508 $self->{options}{'new_option'}='';
dongsheng@623 509 $self->SUPER::initialize(%options);
dongsheng@623 510 $self->{options}{'_default_translated'}.=' <p> <head><title>';
dongsheng@623 511 $self->{options}{'attributes'}.=' <p>lang id';
dongsheng@623 512 $self->{options}{'_default_inline'}.=' <br>';
dongsheng@623 513 $self->treat_options;
dongsheng@623 514
dongsheng@623 515 You should use the B<_default_inline>, B<_default_break>,
dongsheng@623 516 B<_default_placeholder>, B<_default_translated>, B<_default_untranslated>,
dongsheng@623 517 and B<_default_attributes> options in derivated modules. This allow users
dongsheng@623 518 to override the default behavior defined in your module with command line
dongsheng@623 519 options.
dongsheng@623 520
dongsheng@623 521 =head2 OVERRIDING THE found_string FUNCTION
dongsheng@623 522
dongsheng@623 523 Another simple step is to override the function "found_string", which
dongsheng@623 524 receives the extracted strings from the parser, in order to translate them.
dongsheng@623 525 There you can control which strings you want to translate, and perform
dongsheng@623 526 transformations to them before or after the translation itself.
dongsheng@623 527
dongsheng@623 528 It receives the extracted text, the reference on where it was, and a hash
dongsheng@623 529 that contains extra information to control what strings to translate, how
dongsheng@623 530 to translate them and to generate the comment.
dongsheng@623 531
dongsheng@623 532 The content of these options depends on the kind of string it is (specified in an
dongsheng@623 533 entry of this hash):
dongsheng@623 534
dongsheng@623 535 =over
dongsheng@623 536
dongsheng@623 537 =item type="tag"
dongsheng@623 538
dongsheng@623 539 The found string is the content of a translatable tag. The entry "tag_options"
dongsheng@623 540 contains the option characters in front of the tag hierarchy in the module
dongsheng@623 541 "tags" option.
dongsheng@623 542
dongsheng@623 543 =item type="attribute"
dongsheng@623 544
dongsheng@623 545 Means that the found string is the value of a translatable attribute. The
dongsheng@623 546 entry "attribute" has the name of the attribute.
dongsheng@623 547
dongsheng@623 548 =back
dongsheng@623 549
dongsheng@623 550 It must return the text that will replace the original in the translated
dongsheng@623 551 document. Here's a basic example of this function:
dongsheng@623 552
dongsheng@623 553 sub found_string {
dongsheng@623 554 my ($self,$text,$ref,$options)=@_;
dongsheng@623 555 $text = $self->translate($text,$ref,"type ".$options->{'type'},
dongsheng@623 556 'wrap'=>$self->{options}{'wrap'});
dongsheng@623 557 return $text;
dongsheng@623 558 }
dongsheng@623 559
dongsheng@623 560 There's another simple example in the new Dia module, which only filters
dongsheng@623 561 some strings.
dongsheng@623 562
dongsheng@623 563 =cut
dongsheng@623 564
dongsheng@623 565 sub found_string {
dongsheng@623 566 my ($self,$text,$ref,$options)=@_;
dongsheng@623 567
dongsheng@623 568 if ($text =~ m/^\s*$/s) {
dongsheng@623 569 return $text;
dongsheng@623 570 }
dongsheng@623 571
dongsheng@623 572 my $comment;
dongsheng@623 573 my $wrap = $self->{options}{'wrap'};
dongsheng@623 574
dongsheng@623 575 if ($options->{'type'} eq "tag") {
dongsheng@623 576 $comment = "Content of: ".$self->get_path;
dongsheng@623 577
dongsheng@623 578 if($options->{'tag_options'} =~ /w/) {
dongsheng@623 579 $wrap = 1;
dongsheng@623 580 }
dongsheng@623 581 if($options->{'tag_options'} =~ /W/) {
dongsheng@623 582 $wrap = 0;
dongsheng@623 583 }
dongsheng@623 584 } elsif ($options->{'type'} eq "attribute") {
dongsheng@623 585 $comment = "Attribute '".$options->{'attribute'}."' of: ".$self->get_path;
dongsheng@623 586 } elsif ($options->{'type'} eq "CDATA") {
dongsheng@623 587 $comment = "CDATA";
dongsheng@623 588 $wrap = 0;
dongsheng@623 589 } else {
dongsheng@623 590 die wrap_ref_mod($ref, "po4a::xml", dgettext("po4a", "Internal error: unknown type identifier '%s'."), $options->{'type'});
dongsheng@623 591 }
dongsheng@623 592 $text = $self->translate($text,$ref,$comment,'wrap'=>$wrap, comment => $options->{'comments'});
dongsheng@623 593 return $text;
dongsheng@623 594 }
dongsheng@623 595
dongsheng@623 596 =head2 MODIFYING TAG TYPES (TODO)
dongsheng@623 597
dongsheng@623 598 This is a more complex one, but it enables a (almost) total customization.
dongsheng@623 599 It's based in a list of hashes, each one defining a tag type's behavior. The
dongsheng@623 600 list should be sorted so that the most general tags are after the most
dongsheng@623 601 concrete ones (sorted first by the beginning and then by the end keys). To
dongsheng@623 602 define a tag type you'll have to make a hash with the following keys:
dongsheng@623 603
dongsheng@623 604 =over 4
dongsheng@623 605
dongsheng@623 606 =item beginning
dongsheng@623 607
dongsheng@623 608 Specifies the beginning of the tag, after the "E<lt>".
dongsheng@623 609
dongsheng@623 610 =item end
dongsheng@623 611
dongsheng@623 612 Specifies the end of the tag, before the "E<gt>".
dongsheng@623 613
dongsheng@623 614 =item breaking
dongsheng@623 615
dongsheng@623 616 It says if this is a breaking tag class. A non-breaking (inline) tag is one
dongsheng@623 617 that can be taken as part of the content of another tag. It can take the
dongsheng@623 618 values false (0), true (1) or undefined. If you leave this undefined, you'll
dongsheng@623 619 have to define the f_breaking function that will say whether a concrete tag of
dongsheng@623 620 this class is a breaking tag or not.
dongsheng@623 621
dongsheng@623 622 =item f_breaking
dongsheng@623 623
dongsheng@623 624 It's a function that will tell if the next tag is a breaking one or not. It
dongsheng@623 625 should be defined if the "breaking" option is not.
dongsheng@623 626
dongsheng@623 627 =item f_extract
dongsheng@623 628
dongsheng@623 629 If you leave this key undefined, the generic extraction function will have to
dongsheng@623 630 extract the tag itself. It's useful for tags that can have other tags or
dongsheng@623 631 special structures in them, so that the main parser doesn't get mad. This
dongsheng@623 632 function receives a boolean that says if the tag should be removed from the
dongsheng@623 633 input stream or not.
dongsheng@623 634
dongsheng@623 635 =item f_translate
dongsheng@623 636
dongsheng@623 637 This function receives the tag (in the get_string_until() format) and returns
dongsheng@623 638 the translated tag (translated attributes or all needed transformations) as a
dongsheng@623 639 single string.
dongsheng@623 640
dongsheng@623 641 =back
dongsheng@623 642
dongsheng@623 643 =cut
dongsheng@623 644
dongsheng@623 645 ##### Generic XML tag types #####'
dongsheng@623 646
dongsheng@623 647 our @tag_types = (
dongsheng@623 648 { beginning => "!--#",
dongsheng@623 649 end => "--",
dongsheng@623 650 breaking => 0,
dongsheng@623 651 f_extract => \&tag_extract_comment,
dongsheng@623 652 f_translate => \&tag_trans_comment},
dongsheng@623 653 { beginning => "!--",
dongsheng@623 654 end => "--",
dongsheng@623 655 breaking => 0,
dongsheng@623 656 f_extract => \&tag_extract_comment,
dongsheng@623 657 f_translate => \&tag_trans_comment},
dongsheng@623 658 { beginning => "?xml",
dongsheng@623 659 end => "?",
dongsheng@623 660 breaking => 1,
dongsheng@623 661 f_translate => \&tag_trans_xmlhead},
dongsheng@623 662 { beginning => "?",
dongsheng@623 663 end => "?",
dongsheng@623 664 breaking => 1,
dongsheng@623 665 f_translate => \&tag_trans_procins},
dongsheng@623 666 { beginning => "!DOCTYPE",
dongsheng@623 667 end => "",
dongsheng@623 668 breaking => 1,
dongsheng@623 669 f_extract => \&tag_extract_doctype,
dongsheng@623 670 f_translate => \&tag_trans_doctype},
dongsheng@623 671 { beginning => "![CDATA[",
dongsheng@623 672 end => "",
dongsheng@623 673 breaking => 1,
dongsheng@623 674 f_extract => \&CDATA_extract,
dongsheng@623 675 f_translate => \&CDATA_trans},
dongsheng@623 676 { beginning => "/",
dongsheng@623 677 end => "",
dongsheng@623 678 f_breaking => \&tag_break_close,
dongsheng@623 679 f_translate => \&tag_trans_close},
dongsheng@623 680 { beginning => "",
dongsheng@623 681 end => "/",
dongsheng@623 682 f_breaking => \&tag_break_alone,
dongsheng@623 683 f_translate => \&tag_trans_alone},
dongsheng@623 684 { beginning => "",
dongsheng@623 685 end => "",
dongsheng@623 686 f_breaking => \&tag_break_open,
dongsheng@623 687 f_translate => \&tag_trans_open}
dongsheng@623 688 );
dongsheng@623 689
dongsheng@623 690 sub tag_extract_comment {
dongsheng@623 691 my ($self,$remove)=(shift,shift);
dongsheng@623 692 my ($eof,@tag)=$self->get_string_until('-->',{include=>1,remove=>$remove});
dongsheng@623 693 return ($eof,@tag);
dongsheng@623 694 }
dongsheng@623 695
dongsheng@623 696 sub tag_trans_comment {
dongsheng@623 697 my ($self,@tag)=@_;
dongsheng@623 698 return $self->join_lines(@tag);
dongsheng@623 699 }
dongsheng@623 700
dongsheng@623 701 sub tag_trans_xmlhead {
dongsheng@623 702 my ($self,@tag)=@_;
dongsheng@623 703
dongsheng@623 704 # We don't have to translate anything from here: throw away references
dongsheng@623 705 my $tag = $self->join_lines(@tag);
dongsheng@623 706 $tag =~ /encoding=(("|')|)(.*?)(\s|\2)/s;
dongsheng@623 707 my $in_charset=$3;
dongsheng@623 708 $self->detected_charset($in_charset);
dongsheng@623 709 my $out_charset=$self->get_out_charset;
dongsheng@623 710
dongsheng@623 711 if (defined $in_charset) {
dongsheng@623 712 $tag =~ s/$in_charset/$out_charset/;
dongsheng@623 713 } else {
dongsheng@623 714 if ($tag =~ m/standalone/) {
dongsheng@623 715 $tag =~ s/(standalone)/encoding="$out_charset" $1/;
dongsheng@623 716 } else {
dongsheng@623 717 $tag.= " encoding=\"$out_charset\"";
dongsheng@623 718 }
dongsheng@623 719 }
dongsheng@623 720
dongsheng@623 721 return $tag;
dongsheng@623 722 }
dongsheng@623 723
dongsheng@623 724 sub tag_trans_procins {
dongsheng@623 725 my ($self,@tag)=@_;
dongsheng@623 726 return $self->join_lines(@tag);
dongsheng@623 727 }
dongsheng@623 728
dongsheng@623 729 sub tag_extract_doctype {
dongsheng@623 730 my ($self,$remove)=(shift,shift);
dongsheng@623 731
dongsheng@623 732 # Check if there is an internal subset (between []).
dongsheng@623 733 my ($eof,@tag)=$self->get_string_until('>',{include=>1,unquoted=>1});
dongsheng@623 734 my $parity = 0;
dongsheng@623 735 my $paragraph = "";
dongsheng@623 736 map { $parity = 1 - $parity; $paragraph.= $parity?$_:""; } @tag;
dongsheng@623 737 my $found = 0;
dongsheng@623 738 if ($paragraph =~ m/<.*\[.*</s) {
dongsheng@623 739 $found = 1
dongsheng@623 740 }
dongsheng@623 741
dongsheng@623 742 if (not $found) {
dongsheng@623 743 ($eof,@tag)=$self->get_string_until('>',{include=>1,remove=>$remove,unquoted=>1});
dongsheng@623 744 } else {
dongsheng@623 745 ($eof,@tag)=$self->get_string_until(']\s*>',{include=>1,remove=>$remove,unquoted=>1,regex=>1});
dongsheng@623 746 }
dongsheng@623 747 return ($eof,@tag);
dongsheng@623 748 }
dongsheng@623 749
dongsheng@623 750 sub tag_trans_doctype {
dongsheng@623 751 # This check is not really reliable. There are system and public
dongsheng@623 752 # identifiers. Only the public one could be checked reliably.
dongsheng@623 753 my ($self,@tag)=@_;
dongsheng@623 754 if (defined $self->{options}{'doctype'} ) {
dongsheng@623 755 my $doctype = $self->{options}{'doctype'};
dongsheng@623 756 if ( $tag[0] !~ /\Q$doctype\E/i ) {
dongsheng@623 757 warn wrap_ref_mod($tag[1], "po4a::xml", dgettext("po4a", "Bad document type. '%s' expected. You can fix this warning with a -o doctype option, or ignore this check with -o doctype=\"\"."), $doctype);
dongsheng@623 758 }
dongsheng@623 759 }
dongsheng@623 760 my $i = 0;
dongsheng@623 761 my $basedir = $tag[1];
dongsheng@623 762 $basedir =~ s/:[0-9]+$//;
dongsheng@623 763 $basedir = dirname($basedir);
dongsheng@623 764
dongsheng@623 765 while ( $i < $#tag ) {
dongsheng@623 766 my $t = $tag[$i];
dongsheng@623 767 my $ref = $tag[$i+1];
dongsheng@623 768 if ( $t =~ /^(\s*<!ENTITY\s+)(.*)$/is ) {
dongsheng@623 769 my $part1 = $1;
dongsheng@623 770 my $part2 = $2;
dongsheng@623 771 my $includenow = 0;
dongsheng@623 772 my $file = 0;
dongsheng@623 773 my $name = "";
dongsheng@623 774 if ($part2 =~ /^(%\s+)(.*)$/s ) {
dongsheng@623 775 $part1.= $1;
dongsheng@623 776 $part2 = $2;
dongsheng@623 777 $includenow = 1;
dongsheng@623 778 }
dongsheng@623 779 $part2 =~ /^(\S+)(\s+)(.*)$/s;
dongsheng@623 780 $name = $1;
dongsheng@623 781 $part1.= $1.$2;
dongsheng@623 782 $part2 = $3;
dongsheng@623 783 if ( $part2 =~ /^(SYSTEM\s+)(.*)$/is ) {
dongsheng@623 784 $part1.= $1;
dongsheng@623 785 $part2 = $2;
dongsheng@623 786 $file = 1;
dongsheng@623 787 if ($self->{options}{'includeexternal'}) {
dongsheng@623 788 $entities{$name} = $part2;
dongsheng@623 789 $entities{$name} =~ s/^"?(.*?)".*$/$1/s;
dongsheng@623 790 $entities{$name} = File::Spec->catfile($basedir, $entities{$name});
dongsheng@623 791 }
dongsheng@623 792 }
dongsheng@623 793 if ((not $file) and (not $includenow)) {
dongsheng@623 794 if ($part2 =~ m/^\s*(["'])(.*)\1(\s*>.*)$/s) {
dongsheng@623 795 my $comment = "Content of the $name entity";
dongsheng@623 796 my $quote = $1;
dongsheng@623 797 my $text = $2;
dongsheng@623 798 $part2 = $3;
dongsheng@623 799 $text = $self->translate($text,
dongsheng@623 800 $ref,
dongsheng@623 801 $comment,
dongsheng@623 802 'wrap'=>1);
dongsheng@623 803 $t = $part1."$quote$text$quote$part2";
dongsheng@623 804 }
dongsheng@623 805 }
dongsheng@623 806 # print $part1."\n";
dongsheng@623 807 # print $name."\n";
dongsheng@623 808 # print $part2."\n";
dongsheng@623 809 }
dongsheng@623 810 $tag[$i] = $t;
dongsheng@623 811 $i += 2;
dongsheng@623 812 }
dongsheng@623 813 return $self->join_lines(@tag);
dongsheng@623 814 }
dongsheng@623 815
dongsheng@623 816 sub tag_break_close {
dongsheng@623 817 my ($self,@tag)=@_;
dongsheng@623 818 my $struct = $self->get_path;
dongsheng@623 819 my $options = $self->get_translate_options($struct);
dongsheng@623 820 if ($options =~ m/[ip]/) {
dongsheng@623 821 return 0;
dongsheng@623 822 } else {
dongsheng@623 823 return 1;
dongsheng@623 824 }
dongsheng@623 825 }
dongsheng@623 826
dongsheng@623 827 sub tag_trans_close {
dongsheng@623 828 my ($self,@tag)=@_;
dongsheng@623 829 my $name = $self->get_tag_name(@tag);
dongsheng@623 830
dongsheng@623 831 my $test = pop @path;
dongsheng@623 832 if (!defined($test) || $test ne $name ) {
dongsheng@623 833 my $ontagerror = $self->{options}{'ontagerror'};
dongsheng@623 834 if ($ontagerror eq "warn") {
dongsheng@623 835 warn wrap_ref_mod($tag[1], "po4a::xml", dgettext("po4a", "Unexpected closing tag </%s> found. The main document may be wrong. Continuing..."), $name);
dongsheng@623 836 } elsif ($ontagerror ne "silent") {
dongsheng@623 837 die wrap_ref_mod($tag[1], "po4a::xml", dgettext("po4a", "Unexpected closing tag </%s> found. The main document may be wrong."), $name);
dongsheng@623 838 }
dongsheng@623 839 }
dongsheng@623 840 return $self->join_lines(@tag);
dongsheng@623 841 }
dongsheng@623 842
dongsheng@623 843 sub CDATA_extract {
dongsheng@623 844 my ($self,$remove)=(shift,shift);
dongsheng@623 845 my ($eof, @tag) = $self->get_string_until(']]>',{include=>1,unquoted=>0,remove=>$remove});
dongsheng@623 846
dongsheng@623 847 return ($eof, @tag);
dongsheng@623 848 }
dongsheng@623 849
dongsheng@623 850 sub CDATA_trans {
dongsheng@623 851 my ($self,@tag)=@_;
dongsheng@623 852 return $self->found_string($self->join_lines(@tag),
dongsheng@623 853 $tag[1],
dongsheng@623 854 {'type' => "CDATA"});
dongsheng@623 855 }
dongsheng@623 856
dongsheng@623 857 sub tag_break_alone {
dongsheng@623 858 my ($self,@tag)=@_;
dongsheng@623 859 my $struct = $self->get_path($self->get_tag_name(@tag));
dongsheng@623 860 if ($self->get_translate_options($struct) =~ m/i/) {
dongsheng@623 861 return 0;
dongsheng@623 862 } else {
dongsheng@623 863 return 1;
dongsheng@623 864 }
dongsheng@623 865 }
dongsheng@623 866
dongsheng@623 867 sub tag_trans_alone {
dongsheng@623 868 my ($self,@tag)=@_;
dongsheng@623 869 my $name = $self->get_tag_name(@tag);
dongsheng@623 870 push @path, $name;
dongsheng@623 871
dongsheng@623 872 $name = $self->treat_attributes(@tag);
dongsheng@623 873
dongsheng@623 874 pop @path;
dongsheng@623 875 return $name;
dongsheng@623 876 }
dongsheng@623 877
dongsheng@623 878 sub tag_break_open {
dongsheng@623 879 my ($self,@tag)=@_;
dongsheng@623 880 my $struct = $self->get_path($self->get_tag_name(@tag));
dongsheng@623 881 my $options = $self->get_translate_options($struct);
dongsheng@623 882 if ($options =~ m/[ip]/) {
dongsheng@623 883 return 0;
dongsheng@623 884 } else {
dongsheng@623 885 return 1;
dongsheng@623 886 }
dongsheng@623 887 }
dongsheng@623 888
dongsheng@623 889 sub tag_trans_open {
dongsheng@623 890 my ($self,@tag)=@_;
dongsheng@623 891 my $name = $self->get_tag_name(@tag);
dongsheng@623 892 push @path, $name;
dongsheng@623 893
dongsheng@623 894 $name = $self->treat_attributes(@tag);
dongsheng@623 895
dongsheng@623 896 return $name;
dongsheng@623 897 }
dongsheng@623 898
dongsheng@623 899 ##### END of Generic XML tag types #####
dongsheng@623 900
dongsheng@623 901 =head1 INTERNAL FUNCTIONS used to write derivated parsers
dongsheng@623 902
dongsheng@623 903 =head2 WORKING WITH TAGS
dongsheng@623 904
dongsheng@623 905 =over 4
dongsheng@623 906
dongsheng@623 907 =item get_path()
dongsheng@623 908
dongsheng@623 909 This function returns the path to the current tag from the document's root,
dongsheng@623 910 in the form E<lt>htmlE<gt>E<lt>bodyE<gt>E<lt>pE<gt>.
dongsheng@623 911
dongsheng@623 912 An additional array of tags (without brackets) can be passed in argument.
dongsheng@623 913 These path elements are added to the end of the current path.
dongsheng@623 914
dongsheng@623 915 =cut
dongsheng@623 916
dongsheng@623 917 sub get_path {
dongsheng@623 918 my $self = shift;
dongsheng@623 919 my @add = @_;
dongsheng@623 920 if ( @path > 0 or @add > 0 ) {
dongsheng@623 921 return "<".join("><",@path,@add).">";
dongsheng@623 922 } else {
dongsheng@623 923 return "outside any tag (error?)";
dongsheng@623 924 }
dongsheng@623 925 }
dongsheng@623 926
dongsheng@623 927 =item tag_type()
dongsheng@623 928
dongsheng@623 929 This function returns the index from the tag_types list that fits to the next
dongsheng@623 930 tag in the input stream, or -1 if it's at the end of the input file.
dongsheng@623 931
dongsheng@623 932 =cut
dongsheng@623 933
dongsheng@623 934 sub tag_type {
dongsheng@623 935 my $self = shift;
dongsheng@623 936 my ($line,$ref) = $self->shiftline();
dongsheng@623 937 my ($match1,$match2);
dongsheng@623 938 my $found = 0;
dongsheng@623 939 my $i = 0;
dongsheng@623 940
dongsheng@623 941 if (!defined($line)) { return -1; }
dongsheng@623 942
dongsheng@623 943 $self->unshiftline($line,$ref);
dongsheng@623 944 my ($eof,@lines) = $self->get_string_until(">",{include=>1,unquoted=>1});
dongsheng@623 945 my $line2 = $self->join_lines(@lines);
dongsheng@623 946 while (!$found && $i < @tag_types) {
dongsheng@623 947 ($match1,$match2) = ($tag_types[$i]->{beginning},$tag_types[$i]->{end});
dongsheng@623 948 if ($line =~ /^<\Q$match1\E/) {
dongsheng@623 949 if (!defined($tag_types[$i]->{f_extract})) {
dongsheng@623 950 #print substr($line2,length($line2)-1-length($match2),1+length($match2))."\n";
dongsheng@623 951 if (defined($line2) and $line2 =~ /\Q$match2\E>$/) {
dongsheng@623 952 $found = 1;
dongsheng@623 953 #print "YES: <".$match1." ".$match2.">\n";
dongsheng@623 954 } else {
dongsheng@623 955 #print "NO: <".$match1." ".$match2.">\n";
dongsheng@623 956 $i++;
dongsheng@623 957 }
dongsheng@623 958 } else {
dongsheng@623 959 $found = 1;
dongsheng@623 960 }
dongsheng@623 961 } else {
dongsheng@623 962 $i++;
dongsheng@623 963 }
dongsheng@623 964 }
dongsheng@623 965 if (!$found) {
dongsheng@623 966 #It should never enter here, unless you undefine the most
dongsheng@623 967 #general tags (as <...>)
dongsheng@623 968 die "po4a::xml: Unknown tag type: ".$line."\n";
dongsheng@623 969 } else {
dongsheng@623 970 return $i;
dongsheng@623 971 }
dongsheng@623 972 }
dongsheng@623 973
dongsheng@623 974 =item extract_tag($$)
dongsheng@623 975
dongsheng@623 976 This function returns the next tag from the input stream without the beginning
dongsheng@623 977 and end, in an array form, to maintain the references from the input file. It
dongsheng@623 978 has two parameters: the type of the tag (as returned by tag_type) and a
dongsheng@623 979 boolean, that indicates if it should be removed from the input stream.
dongsheng@623 980
dongsheng@623 981 =cut
dongsheng@623 982
dongsheng@623 983 sub extract_tag {
dongsheng@623 984 my ($self,$type,$remove) = (shift,shift,shift);
dongsheng@623 985 my ($match1,$match2) = ($tag_types[$type]->{beginning},$tag_types[$type]->{end});
dongsheng@623 986 my ($eof,@tag);
dongsheng@623 987 if (defined($tag_types[$type]->{f_extract})) {
dongsheng@623 988 ($eof,@tag) = &{$tag_types[$type]->{f_extract}}($self,$remove);
dongsheng@623 989 } else {
dongsheng@623 990 ($eof,@tag) = $self->get_string_until($match2.">",{include=>1,remove=>$remove,unquoted=>1});
dongsheng@623 991 }
dongsheng@623 992 $tag[0] =~ /^<\Q$match1\E(.*)$/s;
dongsheng@623 993 $tag[0] = $1;
dongsheng@623 994 $tag[$#tag-1] =~ /^(.*)\Q$match2\E>$/s;
dongsheng@623 995 $tag[$#tag-1] = $1;
dongsheng@623 996 return ($eof,@tag);
dongsheng@623 997 }
dongsheng@623 998
dongsheng@623 999 =item get_tag_name(@)
dongsheng@623 1000
dongsheng@623 1001 This function returns the name of the tag passed as an argument, in the array
dongsheng@623 1002 form returned by extract_tag.
dongsheng@623 1003
dongsheng@623 1004 =cut
dongsheng@623 1005
dongsheng@623 1006 sub get_tag_name {
dongsheng@623 1007 my ($self,@tag)=@_;
dongsheng@623 1008 $tag[0] =~ /^(\S*)/;
dongsheng@623 1009 return $1;
dongsheng@623 1010 }
dongsheng@623 1011
dongsheng@623 1012 =item breaking_tag()
dongsheng@623 1013
dongsheng@623 1014 This function returns a boolean that says if the next tag in the input stream
dongsheng@623 1015 is a breaking tag or not (inline tag). It leaves the input stream intact.
dongsheng@623 1016
dongsheng@623 1017 =cut
dongsheng@623 1018
dongsheng@623 1019 sub breaking_tag {
dongsheng@623 1020 my $self = shift;
dongsheng@623 1021 my $break;
dongsheng@623 1022
dongsheng@623 1023 my $type = $self->tag_type;
dongsheng@623 1024 if ($type == -1) { return 0; }
dongsheng@623 1025
dongsheng@623 1026 #print "TAG TYPE = ".$type."\n";
dongsheng@623 1027 $break = $tag_types[$type]->{breaking};
dongsheng@623 1028 if (!defined($break)) {
dongsheng@623 1029 # This tag's breaking depends on its content
dongsheng@623 1030 my ($eof,@lines) = $self->extract_tag($type,0);
dongsheng@623 1031 $break = &{$tag_types[$type]->{f_breaking}}($self,@lines);
dongsheng@623 1032 }
dongsheng@623 1033 #print "break = ".$break."\n";
dongsheng@623 1034 return $break;
dongsheng@623 1035 }
dongsheng@623 1036
dongsheng@623 1037 =item treat_tag()
dongsheng@623 1038
dongsheng@623 1039 This function translates the next tag from the input stream. Using each
dongsheng@623 1040 tag type's custom translation functions.
dongsheng@623 1041
dongsheng@623 1042 =cut
dongsheng@623 1043
dongsheng@623 1044 sub treat_tag {
dongsheng@623 1045 my $self = shift;
dongsheng@623 1046 my $type = $self->tag_type;
dongsheng@623 1047
dongsheng@623 1048 my ($match1,$match2) = ($tag_types[$type]->{beginning},$tag_types[$type]->{end});
dongsheng@623 1049 my ($eof,@lines) = $self->extract_tag($type,1);
dongsheng@623 1050
dongsheng@623 1051 $lines[0] =~ /^(\s*)(.*)$/s;
dongsheng@623 1052 my $space1 = $1;
dongsheng@623 1053 $lines[0] = $2;
dongsheng@623 1054 $lines[$#lines-1] =~ /^(.*?)(\s*)$/s;
dongsheng@623 1055 my $space2 = $2;
dongsheng@623 1056 $lines[$#lines-1] = $1;
dongsheng@623 1057
dongsheng@623 1058 # Calling this tag type's specific handling (translation of
dongsheng@623 1059 # attributes...)
dongsheng@623 1060 my $line = &{$tag_types[$type]->{f_translate}}($self,@lines);
dongsheng@623 1061 $self->pushline("<".$match1.$space1.$line.$space2.$match2.">");
dongsheng@623 1062 return $eof;
dongsheng@623 1063 }
dongsheng@623 1064
dongsheng@623 1065 =item tag_in_list($@)
dongsheng@623 1066
dongsheng@623 1067 This function returns a string value that says if the first argument (a tag
dongsheng@623 1068 hierarchy) matches any of the tags from the second argument (a list of tags
dongsheng@623 1069 or tag hierarchies). If it doesn't match, it returns 0. Else, it returns the
dongsheng@623 1070 matched tag's options (the characters in front of the tag) or 1 (if that tag
dongsheng@623 1071 doesn't have options).
dongsheng@623 1072
dongsheng@623 1073 =back
dongsheng@623 1074
dongsheng@623 1075 =cut
dongsheng@623 1076 sub tag_in_list ($$$) {
dongsheng@623 1077 my ($self,$path,$list) = @_;
dongsheng@623 1078 if ($self->{options}{'caseinsensitive'}) {
dongsheng@623 1079 $path = lc $path;
dongsheng@623 1080 }
dongsheng@623 1081
dongsheng@623 1082 while (1) {
dongsheng@623 1083 if (defined $list->{$path}) {
dongsheng@623 1084 if (length $list->{$path}) {
dongsheng@623 1085 return $list->{$path};
dongsheng@623 1086 } else {
dongsheng@623 1087 return 1;
dongsheng@623 1088 }
dongsheng@623 1089 }
dongsheng@623 1090 last unless ($path =~ m/</);
dongsheng@623 1091 $path =~ s/^<.*?>//;
dongsheng@623 1092 }
dongsheng@623 1093
dongsheng@623 1094 return 0;
dongsheng@623 1095 }
dongsheng@623 1096
dongsheng@623 1097 =head2 WORKING WITH ATTRIBUTES
dongsheng@623 1098
dongsheng@623 1099 =over 4
dongsheng@623 1100
dongsheng@623 1101 =item treat_attributes(@)
dongsheng@623 1102
dongsheng@623 1103 This function handles the translation of the tags' attributes. It receives the tag
dongsheng@623 1104 without the beginning / end marks, and then it finds the attributes, and it
dongsheng@623 1105 translates the translatable ones (specified by the module option "attributes").
dongsheng@623 1106 This returns a plain string with the translated tag.
dongsheng@623 1107
dongsheng@623 1108 =back
dongsheng@623 1109
dongsheng@623 1110 =cut
dongsheng@623 1111
dongsheng@623 1112 sub treat_attributes {
dongsheng@623 1113 my ($self,@tag)=@_;
dongsheng@623 1114
dongsheng@623 1115 $tag[0] =~ /^(\S*)(.*)/s;
dongsheng@623 1116 my $text = $1;
dongsheng@623 1117 $tag[0] = $2;
dongsheng@623 1118
dongsheng@623 1119 while (@tag) {
dongsheng@623 1120 my $complete = 1;
dongsheng@623 1121
dongsheng@623 1122 $text .= $self->skip_spaces(\@tag);
dongsheng@623 1123 if (@tag) {
dongsheng@623 1124 # Get the attribute's name
dongsheng@623 1125 $complete = 0;
dongsheng@623 1126
dongsheng@623 1127 $tag[0] =~ /^([^\s=]+)(.*)/s;
dongsheng@623 1128 my $name = $1;
dongsheng@623 1129 my $ref = $tag[1];
dongsheng@623 1130 $tag[0] = $2;
dongsheng@623 1131 $text .= $name;
dongsheng@623 1132 $text .= $self->skip_spaces(\@tag);
dongsheng@623 1133 if (@tag) {
dongsheng@623 1134 # Get the '='
dongsheng@623 1135 if ($tag[0] =~ /^=(.*)/s) {
dongsheng@623 1136 $tag[0] = $1;
dongsheng@623 1137 $text .= "=";
dongsheng@623 1138 $text .= $self->skip_spaces(\@tag);
dongsheng@623 1139 if (@tag) {
dongsheng@623 1140 # Get the value
dongsheng@623 1141 my $value="";
dongsheng@623 1142 $ref=$tag[1];
dongsheng@623 1143 my $quot=substr($tag[0],0,1);
dongsheng@623 1144 if ($quot ne "\"" and $quot ne "'") {
dongsheng@623 1145 # Unquoted value
dongsheng@623 1146 $quot="";
dongsheng@623 1147 $tag[0] =~ /^(\S+)(.*)/s;
dongsheng@623 1148 $value = $1;
dongsheng@623 1149 $tag[0] = $2;
dongsheng@623 1150 } else {
dongsheng@623 1151 # Quoted value
dongsheng@623 1152 $text .= $quot;
dongsheng@623 1153 $tag[0] =~ /^\Q$quot\E(.*)/s;
dongsheng@623 1154 $tag[0] = $1;
dongsheng@623 1155 while ($tag[0] !~ /\Q$quot\E/) {
dongsheng@623 1156 $value .= $tag[0];
dongsheng@623 1157 shift @tag;
dongsheng@623 1158 shift @tag;
dongsheng@623 1159 }
dongsheng@623 1160 $tag[0] =~ /^(.*?)\Q$quot\E(.*)/s;
dongsheng@623 1161 $value .= $1;
dongsheng@623 1162 $tag[0] = $2;
dongsheng@623 1163 }
dongsheng@623 1164 $complete = 1;
dongsheng@623 1165 if ($self->tag_in_list($self->get_path.$name,$self->{attributes})) {
dongsheng@623 1166 $text .= $self->found_string($value, $ref, { type=>"attribute", attribute=>$name });
dongsheng@623 1167 } else {
dongsheng@623 1168 print wrap_ref_mod($ref, "po4a::xml", dgettext("po4a", "Content of attribute %s excluded: %s"), $self->get_path.$name, $value)
dongsheng@623 1169 if $self->debug();
dongsheng@623 1170 $text .= $self->recode_skipped_text($value);
dongsheng@623 1171 }
dongsheng@623 1172 $text .= $quot;
dongsheng@623 1173 }
dongsheng@623 1174 }
dongsheng@623 1175 }
dongsheng@623 1176
dongsheng@623 1177 unless ($complete) {
dongsheng@623 1178 my $ontagerror = $self->{options}{'ontagerror'};
dongsheng@623 1179 if ($ontagerror eq "warn") {
dongsheng@623 1180 warn wrap_ref_mod($ref, "po4a::xml", dgettext ("po4a", "Bad attribute syntax. Continuing..."));
dongsheng@623 1181 } elsif ($ontagerror ne "silent") {
dongsheng@623 1182 die wrap_ref_mod($ref, "po4a::xml", dgettext ("po4a", "Bad attribute syntax"));
dongsheng@623 1183 }
dongsheng@623 1184 }
dongsheng@623 1185 }
dongsheng@623 1186 }
dongsheng@623 1187 return $text;
dongsheng@623 1188 }
dongsheng@623 1189
dongsheng@623 1190 # Returns an empty string if the content in the $path should not be
dongsheng@623 1191 # translated.
dongsheng@623 1192 #
dongsheng@623 1193 # Otherwise, returns the set of options for translation:
dongsheng@623 1194 # w: the content shall be re-wrapped
dongsheng@623 1195 # W: the content shall not be re-wrapped
dongsheng@623 1196 # i: the tag shall be inlined
dongsheng@623 1197 # p: a placeholder shall replace the tag (and its content)
dongsheng@623 1198 #
dongsheng@623 1199 # A translatable inline tag in an untranslated tag is treated as a translatable breaking tag.
dongsheng@623 1200 my %translate_options_cache;
dongsheng@623 1201 sub get_translate_options {
dongsheng@623 1202 my $self = shift;
dongsheng@623 1203 my $path = shift;
dongsheng@623 1204
dongsheng@623 1205 if (defined $translate_options_cache{$path}) {
dongsheng@623 1206 return $translate_options_cache{$path};
dongsheng@623 1207 }
dongsheng@623 1208
dongsheng@623 1209 my $options = "";
dongsheng@623 1210 my $translate = 0;
dongsheng@623 1211 my $usedefault = 1;
dongsheng@623 1212
dongsheng@623 1213 my $inlist = 0;
dongsheng@623 1214 my $tag = $self->get_tag_from_list($path, $self->{tags});
dongsheng@623 1215 if (defined $tag) {
dongsheng@623 1216 $inlist = 1;
dongsheng@623 1217 }
dongsheng@623 1218 if ($self->{options}{'tagsonly'} eq $inlist) {
dongsheng@623 1219 $usedefault = 0;
dongsheng@623 1220 if (defined $tag) {
dongsheng@623 1221 $options = $tag;
dongsheng@623 1222 $options =~ s/<.*$//;
dongsheng@623 1223 } else {
dongsheng@623 1224 if ($self->{options}{'wrap'}) {
dongsheng@623 1225 $options = "w";
dongsheng@623 1226 } else {
dongsheng@623 1227 $options = "W";
dongsheng@623 1228 }
dongsheng@623 1229 }
dongsheng@623 1230 $translate = 1;
dongsheng@623 1231 }
dongsheng@623 1232
dongsheng@623 1233 # TODO: a less precise set of tags should not override a more precise one
dongsheng@623 1234 # The tags and tagsonly options are deprecated.
dongsheng@623 1235 # The translated and untranslated options have an higher priority.
dongsheng@623 1236 $tag = $self->get_tag_from_list($path, $self->{translated});
dongsheng@623 1237 if (defined $tag) {
dongsheng@623 1238 $usedefault = 0;
dongsheng@623 1239 $options = $tag;
dongsheng@623 1240 $options =~ s/<.*$//;
dongsheng@623 1241 $translate = 1;
dongsheng@623 1242 }
dongsheng@623 1243
dongsheng@623 1244 if ($translate and $options !~ m/w/i) {
dongsheng@623 1245 $options .= ($self->{options}{'wrap'})?"w":"W";
dongsheng@623 1246 }
dongsheng@623 1247
dongsheng@623 1248 if (not defined $tag) {
dongsheng@623 1249 $tag = $self->get_tag_from_list($path, $self->{untranslated});
dongsheng@623 1250 if (defined $tag) {
dongsheng@623 1251 $usedefault = 0;
dongsheng@623 1252 $options = "";
dongsheng@623 1253 $translate = 0;
dongsheng@623 1254 }
dongsheng@623 1255 }
dongsheng@623 1256
dongsheng@623 1257 $tag = $self->get_tag_from_list($path, $self->{inline});
dongsheng@623 1258 if (defined $tag) {
dongsheng@623 1259 $usedefault = 0;
dongsheng@623 1260 $options .= "i";
dongsheng@623 1261 } else {
dongsheng@623 1262 $tag = $self->get_tag_from_list($path, $self->{placeholder});
dongsheng@623 1263 if (defined $tag) {
dongsheng@623 1264 $usedefault = 0;
dongsheng@623 1265 $options .= "p";
dongsheng@623 1266 }
dongsheng@623 1267 }
dongsheng@623 1268
dongsheng@623 1269 if ($usedefault) {
dongsheng@623 1270 $options = $self->{options}{'defaulttranslateoption'};
dongsheng@623 1271 }
dongsheng@623 1272
dongsheng@623 1273 # A translatable inline tag in an untranslated tag is treated as a
dongsheng@623 1274 # translatable breaking tag.
dongsheng@623 1275 if ($options =~ m/i/) {
dongsheng@623 1276 my $ppath = $path;
dongsheng@623 1277 $ppath =~ s/<[^>]*>$//;
dongsheng@623 1278 my $poptions = $self->get_translate_options ($ppath);
dongsheng@623 1279 if ($poptions eq "") {
dongsheng@623 1280 $options =~ s/i//;
dongsheng@623 1281 }
dongsheng@623 1282 }
dongsheng@623 1283
dongsheng@623 1284 if ($options =~ m/i/ and $self->{options}{'foldattributes'}) {
dongsheng@623 1285 $options .= "f";
dongsheng@623 1286 }
dongsheng@623 1287
dongsheng@623 1288 $translate_options_cache{$path} = $options;
dongsheng@623 1289 return $options;
dongsheng@623 1290 }
dongsheng@623 1291
dongsheng@623 1292
dongsheng@623 1293 # Return the tag (or biggest set of tags) of a list which matches with the
dongsheng@623 1294 # given path.
dongsheng@623 1295 #
dongsheng@623 1296 # The tag (or set of tags) is returned with its options.
dongsheng@623 1297 #
dongsheng@623 1298 # If no tags could match the path, undef is returned.
dongsheng@623 1299 sub get_tag_from_list ($$$) {
dongsheng@623 1300 my ($self,$path,$list) = @_;
dongsheng@623 1301 if ($self->{options}{'caseinsensitive'}) {
dongsheng@623 1302 $path = lc $path;
dongsheng@623 1303 }
dongsheng@623 1304
dongsheng@623 1305 while (1) {
dongsheng@623 1306 if (defined $list->{$path}) {
dongsheng@623 1307 return $list->{$path}.$path;
dongsheng@623 1308 }
dongsheng@623 1309 last unless ($path =~ m/</);
dongsheng@623 1310 $path =~ s/^<.*?>//;
dongsheng@623 1311 }
dongsheng@623 1312
dongsheng@623 1313 return undef;
dongsheng@623 1314 }
dongsheng@623 1315
dongsheng@623 1316
dongsheng@623 1317
dongsheng@623 1318 sub treat_content {
dongsheng@623 1319 my $self = shift;
dongsheng@623 1320 my $blank="";
dongsheng@623 1321 # Indicates if the paragraph will have to be translated
dongsheng@623 1322 my $translate = "";
dongsheng@623 1323
dongsheng@623 1324 my ($eof,@paragraph)=$self->get_string_until('<',{remove=>1});
dongsheng@623 1325
dongsheng@623 1326 while (!$eof and !$self->breaking_tag) {
dongsheng@623 1327 NEXT_TAG:
dongsheng@623 1328 my @text;
dongsheng@623 1329 my $type = $self->tag_type;
dongsheng@623 1330 my $f_extract = $tag_types[$type]->{'f_extract'};
dongsheng@623 1331 if ( defined($f_extract)
dongsheng@623 1332 and $f_extract eq \&tag_extract_comment) {
dongsheng@623 1333 # Remove the content of the comments
dongsheng@623 1334 ($eof, @text) = $self->extract_tag($type,1);
dongsheng@623 1335 $text[$#text-1] .= "\0";
dongsheng@623 1336 if ($tag_types[$type]->{'beginning'} eq "!--#") {
dongsheng@623 1337 $text[0] = "#".$text[0];
dongsheng@623 1338 }
dongsheng@623 1339 push @comments, @text;
dongsheng@623 1340 } else {
dongsheng@623 1341 my ($tmpeof, @tag) = $self->extract_tag($type,0);
dongsheng@623 1342 # Append the found inline tag
dongsheng@623 1343 ($eof,@text)=$self->get_string_until('>',
dongsheng@623 1344 {include=>1,
dongsheng@623 1345 remove=>1,
dongsheng@623 1346 unquoted=>1});
dongsheng@623 1347 # Append or remove the opening/closing tag from
dongsheng@623 1348 # the tag path
dongsheng@623 1349 if ($tag_types[$type]->{'end'} eq "") {
dongsheng@623 1350 if ($tag_types[$type]->{'beginning'} eq "") {
dongsheng@623 1351 # Opening inline tag
dongsheng@623 1352 my $cur_tag_name = $self->get_tag_name(@tag);
dongsheng@623 1353 my $t_opts = $self->get_translate_options($self->get_path($cur_tag_name));
dongsheng@623 1354 if ($t_opts =~ m/p/) {
dongsheng@623 1355 # We enter a new holder.
dongsheng@623 1356 # Append a <placeholder ...> tag to the current
dongsheng@623 1357 # paragraph, and save the @paragraph in the
dongsheng@623 1358 # current holder.
dongsheng@623 1359 my $last_holder = $save_holders[$#save_holders];
dongsheng@623 1360 my $placeholder_str = "<placeholder type=\"".$cur_tag_name."\" id=\"".($#{$last_holder->{'sub_translations'}}+1)."\"/>";
dongsheng@623 1361 push @paragraph, ($placeholder_str, $text[1]);
dongsheng@623 1362 my @saved_paragraph = @paragraph;
dongsheng@623 1363
dongsheng@623 1364 $last_holder->{'paragraph'} = \@saved_paragraph;
dongsheng@623 1365
dongsheng@623 1366 # Then we must push a new holder
dongsheng@623 1367 my @new_paragraph = ();
dongsheng@623 1368 my @sub_translations = ();
dongsheng@623 1369 my %folded_attributes;
dongsheng@623 1370 my %new_holder = ('paragraph' => \@new_paragraph,
dongsheng@623 1371 'open' => $text[0],
dongsheng@623 1372 'translation' => "",
dongsheng@623 1373 'close' => undef,
dongsheng@623 1374 'sub_translations' => \@sub_translations,
dongsheng@623 1375 'folded_attributes' => \%folded_attributes);
dongsheng@623 1376 push @save_holders, \%new_holder;
dongsheng@623 1377 @text = ();
dongsheng@623 1378
dongsheng@623 1379 # The current @paragraph
dongsheng@623 1380 # (for the current holder)
dongsheng@623 1381 # is empty.
dongsheng@623 1382 @paragraph = ();
dongsheng@623 1383 } elsif ($t_opts =~ m/f/) {
dongsheng@623 1384 my $tag_full = $self->join_lines(@text);
dongsheng@623 1385 my $tag_ref = $text[1];
dongsheng@623 1386 if ($tag_full =~ m/^<\s*\S+\s+\S.*>$/s) {
dongsheng@623 1387 my $holder = $save_holders[$#save_holders];
dongsheng@623 1388 my $id = 0;
dongsheng@623 1389 foreach (keys %{$holder->{folded_attributes}}) {
dongsheng@623 1390 $id = $_ + 1 if ($_ >= $id);
dongsheng@623 1391 }
dongsheng@623 1392 $holder->{folded_attributes}->{$id} = $tag_full;
dongsheng@623 1393
dongsheng@623 1394 @text = ("<$cur_tag_name po4a-id=$id>", $tag_ref);
dongsheng@623 1395 }
dongsheng@623 1396 }
dongsheng@623 1397 push @path, $cur_tag_name;
dongsheng@623 1398 } elsif ($tag_types[$type]->{'beginning'} eq "/") {
dongsheng@623 1399 # Closing inline tag
dongsheng@623 1400
dongsheng@623 1401 # Check if this is closing the
dongsheng@623 1402 # last opening tag we detected.
dongsheng@623 1403 my $test = pop @path;
dongsheng@623 1404 my $name = $self->get_tag_name(@tag);
dongsheng@623 1405 if (!defined($test) ||
dongsheng@623 1406 $test ne $name ) {
dongsheng@623 1407 my $ontagerror = $self->{options}{'ontagerror'};
dongsheng@623 1408 if ($ontagerror eq "warn") {
dongsheng@623 1409 warn wrap_ref_mod($tag[1], "po4a::xml", dgettext("po4a", "Unexpected closing tag </%s> found. The main document may be wrong. Continuing..."), $name);
dongsheng@623 1410 } elsif ($ontagerror ne "silent") {
dongsheng@623 1411 die wrap_ref_mod($tag[1], "po4a::xml", dgettext("po4a", "Unexpected closing tag </%s> found. The main document may be wrong."), $name);
dongsheng@623 1412 }
dongsheng@623 1413 }
dongsheng@623 1414
dongsheng@623 1415 if ($self->get_translate_options($self->get_path($self->get_tag_name(@tag))) =~ m/p/) {
dongsheng@623 1416 # This closes the current holder.
dongsheng@623 1417
dongsheng@623 1418 push @path, $self->get_tag_name(@tag);
dongsheng@623 1419 # Now translate this paragraph if needed.
dongsheng@623 1420 # This will call pushline and append the
dongsheng@623 1421 # translation to the current holder's translation.
dongsheng@623 1422 $self->translate_paragraph(@paragraph);
dongsheng@623 1423 pop @path;
dongsheng@623 1424
dongsheng@623 1425 # Now that this holder is closed, we can remove
dongsheng@623 1426 # the holder from the stack.
dongsheng@623 1427 my $holder = pop @save_holders;
dongsheng@623 1428 # We need to keep the translation of this holder
dongsheng@623 1429 my $translation = $holder->{'open'}.$holder->{'translation'}.$text[0];
dongsheng@623 1430 # FIXME: @text could be multilines.
dongsheng@623 1431
dongsheng@623 1432 @text = ();
dongsheng@623 1433
dongsheng@623 1434 # Then we store the translation in the previous
dongsheng@623 1435 # holder's sub_translations array
dongsheng@623 1436 my $previous_holder = $save_holders[$#save_holders];
dongsheng@623 1437 push @{$previous_holder->{'sub_translations'}}, $translation;
dongsheng@623 1438 # We also need to restore the @paragraph array, as
dongsheng@623 1439 # it was before we encountered the holder.
dongsheng@623 1440 @paragraph = @{$previous_holder->{'paragraph'}};
dongsheng@623 1441 }
dongsheng@623 1442 }
dongsheng@623 1443 }
dongsheng@623 1444 push @paragraph, @text;
dongsheng@623 1445 }
dongsheng@623 1446
dongsheng@623 1447 # Next tag
dongsheng@623 1448 ($eof,@text)=$self->get_string_until('<',{remove=>1});
dongsheng@623 1449 if ($#text > 0) {
dongsheng@623 1450 # Check if text (extracted after the inline tag)
dongsheng@623 1451 # has to be translated
dongsheng@623 1452 push @paragraph, @text;
dongsheng@623 1453 }
dongsheng@623 1454 }
dongsheng@623 1455
dongsheng@623 1456 # This strips the extracted strings
dongsheng@623 1457 # (only if you don't specify the 'nostrip' option, and if the
dongsheng@623 1458 # paragraph can be re-wrapped)
dongsheng@623 1459 $translate = $self->get_translate_options($self->get_path);
dongsheng@623 1460 if (!$self->{options}{'nostrip'} and $translate !~ m/W/) {
dongsheng@623 1461 my $clean = 0;
dongsheng@623 1462 # Clean the beginning
dongsheng@623 1463 while (!$clean and $#paragraph > 0) {
dongsheng@623 1464 $paragraph[0] =~ /^(\s*)(.*)/s;
dongsheng@623 1465 my $match = $1;
dongsheng@623 1466 if ($paragraph[0] eq $match) {
dongsheng@623 1467 if ($match ne "") {
dongsheng@623 1468 $self->pushline($match);
dongsheng@623 1469 }
dongsheng@623 1470 shift @paragraph;
dongsheng@623 1471 shift @paragraph;
dongsheng@623 1472 } else {
dongsheng@623 1473 $paragraph[0] = $2;
dongsheng@623 1474 if ($match ne "") {
dongsheng@623 1475 $self->pushline($match);
dongsheng@623 1476 }
dongsheng@623 1477 $clean = 1;
dongsheng@623 1478 }
dongsheng@623 1479 }
dongsheng@623 1480 $clean = 0;
dongsheng@623 1481 # Clean the end
dongsheng@623 1482 while (!$clean and $#paragraph > 0) {
dongsheng@623 1483 $paragraph[$#paragraph-1] =~ /^(.*?)(\s*)$/s;
dongsheng@623 1484 my $match = $2;
dongsheng@623 1485 if ($paragraph[$#paragraph-1] eq $match) {
dongsheng@623 1486 if ($match ne "") {
dongsheng@623 1487 $blank = $match.$blank;
dongsheng@623 1488 }
dongsheng@623 1489 pop @paragraph;
dongsheng@623 1490 pop @paragraph;
dongsheng@623 1491 } else {
dongsheng@623 1492 $paragraph[$#paragraph-1] = $1;
dongsheng@623 1493 if ($match ne "") {
dongsheng@623 1494 $blank = $match.$blank;
dongsheng@623 1495 }
dongsheng@623 1496 $clean = 1;
dongsheng@623 1497 }
dongsheng@623 1498 }
dongsheng@623 1499 }
dongsheng@623 1500
dongsheng@623 1501 # Translate the string when needed
dongsheng@623 1502 # This will either push the translation in the translated document or
dongsheng@623 1503 # in the current holder translation.
dongsheng@623 1504 $self->translate_paragraph(@paragraph);
dongsheng@623 1505
dongsheng@623 1506 # Push the trailing blanks
dongsheng@623 1507 if ($blank ne "") {
dongsheng@623 1508 $self->pushline($blank);
dongsheng@623 1509 }
dongsheng@623 1510 return $eof;
dongsheng@623 1511 }
dongsheng@623 1512
dongsheng@623 1513 # Translate a @paragraph array of (string, reference).
dongsheng@623 1514 # The $translate argument indicates if the strings must be translated or
dongsheng@623 1515 # just pushed
dongsheng@623 1516 sub translate_paragraph {
dongsheng@623 1517 my $self = shift;
dongsheng@623 1518 my @paragraph = @_;
dongsheng@623 1519 my $translate = $self->get_translate_options($self->get_path);
dongsheng@623 1520
dongsheng@623 1521 while ( (scalar @paragraph)
dongsheng@623 1522 and ($paragraph[0] =~ m/^\s*\n/s)) {
dongsheng@623 1523 $self->pushline($paragraph[0]);
dongsheng@623 1524 shift @paragraph;
dongsheng@623 1525 shift @paragraph;
dongsheng@623 1526 }
dongsheng@623 1527
dongsheng@623 1528 my $comments;
dongsheng@623 1529 while (@comments) {
dongsheng@623 1530 my ($comment,$eoc);
dongsheng@623 1531 do {
dongsheng@623 1532 my ($t,$l) = (shift @comments, shift @comments);
dongsheng@623 1533 $t =~ s/\n?(\0)?$//;
dongsheng@623 1534 $eoc = $1;
dongsheng@623 1535 $comment .= "\n" if defined $comment;
dongsheng@623 1536 $comment .= $t;
dongsheng@623 1537 } until ($eoc);
dongsheng@623 1538 $comments .= "\n" if defined $comments;
dongsheng@623 1539 $comments .= $comment;
dongsheng@623 1540 $self->pushline("<!--".$comment."-->\n") if defined $comment;
dongsheng@623 1541 }
dongsheng@623 1542 @comments = ();
dongsheng@623 1543
dongsheng@623 1544 if ($self->{options}{'cpp'}) {
dongsheng@623 1545 my @tmp = @paragraph;
dongsheng@623 1546 @paragraph = ();
dongsheng@623 1547 while (@tmp) {
dongsheng@623 1548 my ($t,$l) = (shift @tmp, shift @tmp);
dongsheng@623 1549 # #include can be followed by a filename between
dongsheng@623 1550 # <> brackets. In that case, the argument won't be
dongsheng@623 1551 # handled in the same call to translate_paragraph.
dongsheng@623 1552 # Thus do not try to match "include ".
dongsheng@623 1553 if ($t =~ m/^#[ \t]*(if |endif|undef |include|else|ifdef |ifndef |define )/si) {
dongsheng@623 1554 if (@paragraph) {
dongsheng@623 1555 $self->translate_paragraph(@paragraph);
dongsheng@623 1556 @paragraph = ();
dongsheng@623 1557 $self->pushline("\n");
dongsheng@623 1558 }
dongsheng@623 1559 $self->pushline($t);
dongsheng@623 1560 } else {
dongsheng@623 1561 push @paragraph, ($t,$l);
dongsheng@623 1562 }
dongsheng@623 1563 }
dongsheng@623 1564 }
dongsheng@623 1565
dongsheng@623 1566 my $para = $self->join_lines(@paragraph);
dongsheng@623 1567 if ( length($para) > 0 ) {
dongsheng@623 1568 if ($translate ne "") {
dongsheng@623 1569 # This tag should be translated
dongsheng@623 1570 $self->pushline($self->found_string(
dongsheng@623 1571 $para,
dongsheng@623 1572 $paragraph[1], {
dongsheng@623 1573 type=>"tag",
dongsheng@623 1574 tag_options=>$translate,
dongsheng@623 1575 comments=>$comments
dongsheng@623 1576 }));
dongsheng@623 1577 } else {
dongsheng@623 1578 # Inform that this tag isn't translated in debug mode
dongsheng@623 1579 print wrap_ref_mod($paragraph[1], "po4a::xml", dgettext ("po4a", "Content of tag %s excluded: %s"), $self->get_path, $para)
dongsheng@623 1580 if $self->debug();
dongsheng@623 1581 $self->pushline($self->recode_skipped_text($para));
dongsheng@623 1582 }
dongsheng@623 1583 }
dongsheng@623 1584 # Now the paragraph is fully translated.
dongsheng@623 1585 # If we have all the holders' translation, we can replace the
dongsheng@623 1586 # placeholders by their translations.
dongsheng@623 1587 # We must wait to have all the translations because the holders are
dongsheng@623 1588 # numbered.
dongsheng@623 1589 {
dongsheng@623 1590 my $holder = $save_holders[$#save_holders];
dongsheng@623 1591 my $translation = $holder->{'translation'};
dongsheng@623 1592
dongsheng@623 1593 # Count the number of <placeholder ...> in $translation
dongsheng@623 1594 my $count = 0;
dongsheng@623 1595 my $str = $translation;
dongsheng@623 1596 while ( (defined $str)
dongsheng@623 1597 and ($str =~ m/^.*?<placeholder\s+type="[^"]+"\s+id="(\d+)"\s*\/>(.*)$/s)) {
dongsheng@623 1598 $count += 1;
dongsheng@623 1599 $str = $2;
dongsheng@623 1600 if ($holder->{'sub_translations'}->[$1] =~ m/<placeholder\s+type="[^"]+"\s+id="(\d+)"\s*\/>/s) {
dongsheng@623 1601 $count = -1;
dongsheng@623 1602 last;
dongsheng@623 1603 }
dongsheng@623 1604 }
dongsheng@623 1605
dongsheng@623 1606 if ( (defined $translation)
dongsheng@623 1607 and (scalar(@{$holder->{'sub_translations'}}) == $count)) {
dongsheng@623 1608 # OK, all the holders of the current paragraph are
dongsheng@623 1609 # closed (and translated).
dongsheng@623 1610 # Replace them by their translation.
dongsheng@623 1611 while ($translation =~ m/^(.*?)<placeholder\s+type="[^"]+"\s+id="(\d+)"\s*\/>(.*)$/s) {
dongsheng@623 1612 # FIXME: we could also check that
dongsheng@623 1613 # * the holder exists
dongsheng@623 1614 # * all the holders are used
dongsheng@623 1615 $translation = $1.$holder->{'sub_translations'}->[$2].$3;
dongsheng@623 1616 }
dongsheng@623 1617 # We have our translation
dongsheng@623 1618 $holder->{'translation'} = $translation;
dongsheng@623 1619 # And there is no need for any holder in it.
dongsheng@623 1620 my @sub_translations = ();
dongsheng@623 1621 $holder->{'sub_translations'} = \@sub_translations;
dongsheng@623 1622 }
dongsheng@623 1623 }
dongsheng@623 1624
dongsheng@623 1625 }
dongsheng@623 1626
dongsheng@623 1627
dongsheng@623 1628
dongsheng@623 1629 =head2 WORKING WITH THE MODULE OPTIONS
dongsheng@623 1630
dongsheng@623 1631 =over 4
dongsheng@623 1632
dongsheng@623 1633 =item treat_options()
dongsheng@623 1634
dongsheng@623 1635 This function fills the internal structures that contain the tags, attributes
dongsheng@623 1636 and inline data with the options of the module (specified in the command-line
dongsheng@623 1637 or in the initialize function).
dongsheng@623 1638
dongsheng@623 1639 =back
dongsheng@623 1640
dongsheng@623 1641 =cut
dongsheng@623 1642
dongsheng@623 1643 sub treat_options {
dongsheng@623 1644 my $self = shift;
dongsheng@623 1645
dongsheng@623 1646 if ($self->{options}{'caseinsensitive'}) {
dongsheng@623 1647 $self->{options}{'nodefault'} = lc $self->{options}{'nodefault'};
dongsheng@623 1648 $self->{options}{'tags'} = lc $self->{options}{'tags'};
dongsheng@623 1649 $self->{options}{'break'} = lc $self->{options}{'break'};
dongsheng@623 1650 $self->{options}{'_default_break'} = lc $self->{options}{'_default_break'};
dongsheng@623 1651 $self->{options}{'translated'} = lc $self->{options}{'translated'};
dongsheng@623 1652 $self->{options}{'_default_translated'} = lc $self->{options}{'_default_translated'};
dongsheng@623 1653 $self->{options}{'untranslated'} = lc $self->{options}{'untranslated'};
dongsheng@623 1654 $self->{options}{'_default_untranslated'} = lc $self->{options}{'_default_untranslated'};
dongsheng@623 1655 $self->{options}{'attributes'} = lc $self->{options}{'attributes'};
dongsheng@623 1656 $self->{options}{'_default_attributes'} = lc $self->{options}{'_default_attributes'};
dongsheng@623 1657 $self->{options}{'inline'} = lc $self->{options}{'inline'};
dongsheng@623 1658 $self->{options}{'_default_inline'} = lc $self->{options}{'_default_inline'};
dongsheng@623 1659 $self->{options}{'placeholder'} = lc $self->{options}{'placeholder'};
dongsheng@623 1660 $self->{options}{'_default_placeholder'} = lc $self->{options}{'_default_placeholder'};
dongsheng@623 1661 }
dongsheng@623 1662
dongsheng@623 1663 $self->{options}{'nodefault'} =~ /^\s*(.*)\s*$/s;
dongsheng@623 1664 my %list_nodefault;
dongsheng@623 1665 foreach (split(/\s+/s,$1)) {
dongsheng@623 1666 $list_nodefault{$_} = 1;
dongsheng@623 1667 }
dongsheng@623 1668 $self->{nodefault} = \%list_nodefault;
dongsheng@623 1669
dongsheng@623 1670 $self->{options}{'tags'} =~ /^\s*(.*)\s*$/s;
dongsheng@623 1671 if (length $self->{options}{'tags'}) {
dongsheng@623 1672 warn wrap_mod("po4a::xml",
dongsheng@623 1673 dgettext("po4a",
dongsheng@623 1674 "The '%s' option is deprecated. Please use the translated/untranslated and/or break/inline/placeholder categories."), "tags");
dongsheng@623 1675 }
dongsheng@623 1676 foreach (split(/\s+/s,$1)) {
dongsheng@623 1677 $_ =~ m/^(.*?)(<.*)$/;
dongsheng@623 1678 $self->{tags}->{$2} = $1 || "";
dongsheng@623 1679 }
dongsheng@623 1680
dongsheng@623 1681 if ($self->{options}{'tagsonly'}) {
dongsheng@623 1682 warn wrap_mod("po4a::xml",
dongsheng@623 1683 dgettext("po4a",
dongsheng@623 1684 "The '%s' option is deprecated. Please use the translated/untranslated and/or break/inline/placeholder categories."), "tagsonly");
dongsheng@623 1685 }
dongsheng@623 1686
dongsheng@623 1687 $self->{options}{'break'} =~ /^\s*(.*)\s*$/s;
dongsheng@623 1688 foreach my $tag (split(/\s+/s,$1)) {
dongsheng@623 1689 $tag =~ m/^(.*?)(<.*)$/;
dongsheng@623 1690 $self->{break}->{$2} = $1 || "";
dongsheng@623 1691 }
dongsheng@623 1692 $self->{options}{'_default_break'} =~ /^\s*(.*)\s*$/s;
dongsheng@623 1693 foreach my $tag (split(/\s+/s,$1)) {
dongsheng@623 1694 $tag =~ m/^(.*?)(<.*)$/;
dongsheng@623 1695 $self->{break}->{$2} = $1 || ""
dongsheng@623 1696 unless $list_nodefault{$2}
dongsheng@623 1697 or defined $self->{break}->{$2};
dongsheng@623 1698 }
dongsheng@623 1699
dongsheng@623 1700 $self->{options}{'translated'} =~ /^\s*(.*)\s*$/s;
dongsheng@623 1701 foreach my $tag (split(/\s+/s,$1)) {
dongsheng@623 1702 $tag =~ m/^(.*?)(<.*)$/;
dongsheng@623 1703 $self->{translated}->{$2} = $1 || "";
dongsheng@623 1704 }
dongsheng@623 1705 $self->{options}{'_default_translated'} =~ /^\s*(.*)\s*$/s;
dongsheng@623 1706 foreach my $tag (split(/\s+/s,$1)) {
dongsheng@623 1707 $tag =~ m/^(.*?)(<.*)$/;
dongsheng@623 1708 $self->{translated}->{$2} = $1 || ""
dongsheng@623 1709 unless $list_nodefault{$2}
dongsheng@623 1710 or defined $self->{translated}->{$2};
dongsheng@623 1711 }
dongsheng@623 1712
dongsheng@623 1713 $self->{options}{'untranslated'} =~ /^\s*(.*)\s*$/s;
dongsheng@623 1714 foreach my $tag (split(/\s+/s,$1)) {
dongsheng@623 1715 $tag =~ m/^(.*?)(<.*)$/;
dongsheng@623 1716 $self->{untranslated}->{$2} = $1 || "";
dongsheng@623 1717 }
dongsheng@623 1718 $self->{options}{'_default_untranslated'} =~ /^\s*(.*)\s*$/s;
dongsheng@623 1719 foreach my $tag (split(/\s+/s,$1)) {
dongsheng@623 1720 $tag =~ m/^(.*?)(<.*)$/;
dongsheng@623 1721 $self->{untranslated}->{$2} = $1 || ""
dongsheng@623 1722 unless $list_nodefault{$2}
dongsheng@623 1723 or defined $self->{untranslated}->{$2};
dongsheng@623 1724 }
dongsheng@623 1725
dongsheng@623 1726 $self->{options}{'attributes'} =~ /^\s*(.*)\s*$/s;
dongsheng@623 1727 foreach my $tag (split(/\s+/s,$1)) {
dongsheng@623 1728 if ($tag =~ m/^(.*?)(<.*)$/) {
dongsheng@623 1729 $self->{attributes}->{$2} = $1 || "";
dongsheng@623 1730 } else {
dongsheng@623 1731 $self->{attributes}->{$tag} = "";
dongsheng@623 1732 }
dongsheng@623 1733 }
dongsheng@623 1734 $self->{options}{'_default_attributes'} =~ /^\s*(.*)\s*$/s;
dongsheng@623 1735 foreach my $tag (split(/\s+/s,$1)) {
dongsheng@623 1736 if ($tag =~ m/^(.*?)(<.*)$/) {
dongsheng@623 1737 $self->{attributes}->{$2} = $1 || ""
dongsheng@623 1738 unless $list_nodefault{$2}
dongsheng@623 1739 or defined $self->{attributes}->{$2};
dongsheng@623 1740 } else {
dongsheng@623 1741 $self->{attributes}->{$tag} = ""
dongsheng@623 1742 unless $list_nodefault{$tag}
dongsheng@623 1743 or defined $self->{attributes}->{$tag};
dongsheng@623 1744 }
dongsheng@623 1745 }
dongsheng@623 1746
dongsheng@623 1747 my @list_inline;
dongsheng@623 1748 $self->{options}{'inline'} =~ /^\s*(.*)\s*$/s;
dongsheng@623 1749 foreach my $tag (split(/\s+/s,$1)) {
dongsheng@623 1750 $tag =~ m/^(.*?)(<.*)$/;
dongsheng@623 1751 $self->{inline}->{$2} = $1 || "";
dongsheng@623 1752 }
dongsheng@623 1753 $self->{options}{'_default_inline'} =~ /^\s*(.*)\s*$/s;
dongsheng@623 1754 foreach my $tag (split(/\s+/s,$1)) {
dongsheng@623 1755 $tag =~ m/^(.*?)(<.*)$/;
dongsheng@623 1756 $self->{inline}->{$2} = $1 || ""
dongsheng@623 1757 unless $list_nodefault{$2}
dongsheng@623 1758 or defined $self->{inline}->{$2};
dongsheng@623 1759 }
dongsheng@623 1760
dongsheng@623 1761 $self->{options}{'placeholder'} =~ /^\s*(.*)\s*$/s;
dongsheng@623 1762 foreach my $tag (split(/\s+/s,$1)) {
dongsheng@623 1763 $tag =~ m/^(.*?)(<.*)$/;
dongsheng@623 1764 $self->{placeholder}->{$2} = $1 || "";
dongsheng@623 1765 }
dongsheng@623 1766 $self->{options}{'_default_placeholder'} =~ /^\s*(.*)\s*$/s;
dongsheng@623 1767 foreach my $tag (split(/\s+/s,$1)) {
dongsheng@623 1768 $tag =~ m/^(.*?)(<.*)$/;
dongsheng@623 1769 $self->{placeholder}->{$2} = $1 || ""
dongsheng@623 1770 unless $list_nodefault{$2}
dongsheng@623 1771 or defined $self->{placeholder}->{$2};
dongsheng@623 1772 }
dongsheng@623 1773
dongsheng@623 1774 # There should be no translated and untranslated tags
dongsheng@623 1775 foreach my $tag (keys %{$self->{translated}}) {
dongsheng@623 1776 die wrap_mod("po4a::xml",
dongsheng@623 1777 dgettext("po4a",
dongsheng@623 1778 "Tag '%s' both in the %s and %s categories."), $tag, "translated", "untranslated")
dongsheng@623 1779 if defined $self->{untranslated}->{$tag};
dongsheng@623 1780 }
dongsheng@623 1781 # There should be no inline, break, and placeholder tags
dongsheng@623 1782 foreach my $tag (keys %{$self->{inline}}) {
dongsheng@623 1783 die wrap_mod("po4a::xml",
dongsheng@623 1784 dgettext("po4a",
dongsheng@623 1785 "Tag '%s' both in the %s and %s categories."), $tag, "inline", "break")
dongsheng@623 1786 if defined $self->{break}->{$tag};
dongsheng@623 1787 die wrap_mod("po4a::xml",
dongsheng@623 1788 dgettext("po4a",
dongsheng@623 1789 "Tag '%s' both in the %s and %s categories."), $tag, "inline", "placeholder")
dongsheng@623 1790 if defined $self->{placeholder}->{$tag};
dongsheng@623 1791 }
dongsheng@623 1792 foreach my $tag (keys %{$self->{break}}) {
dongsheng@623 1793 die wrap_mod("po4a::xml",
dongsheng@623 1794 dgettext("po4a",
dongsheng@623 1795 "Tag '%s' both in the %s and %s categories."), $tag, "break", "placeholder")
dongsheng@623 1796 if defined $self->{placeholder}->{$tag};
dongsheng@623 1797 }
dongsheng@623 1798 }
dongsheng@623 1799
dongsheng@623 1800 =head2 GETTING TEXT FROM THE INPUT DOCUMENT
dongsheng@623 1801
dongsheng@623 1802 =over
dongsheng@623 1803
dongsheng@623 1804 =item get_string_until($%)
dongsheng@623 1805
dongsheng@623 1806 This function returns an array with the lines (and references) from the input
dongsheng@623 1807 document until it finds the first argument. The second argument is an options
dongsheng@623 1808 hash. Value 0 means disabled (the default) and 1, enabled.
dongsheng@623 1809
dongsheng@623 1810 The valid options are:
dongsheng@623 1811
dongsheng@623 1812 =over 4
dongsheng@623 1813
dongsheng@623 1814 =item include
dongsheng@623 1815
dongsheng@623 1816 This makes the returned array to contain the searched text
dongsheng@623 1817
dongsheng@623 1818 =item remove
dongsheng@623 1819
dongsheng@623 1820 This removes the returned stream from the input
dongsheng@623 1821
dongsheng@623 1822 =item unquoted
dongsheng@623 1823
dongsheng@623 1824 This ensures that the searched text is outside any quotes
dongsheng@623 1825
dongsheng@623 1826 =back
dongsheng@623 1827
dongsheng@623 1828 =cut
dongsheng@623 1829
dongsheng@623 1830 sub get_string_until {
dongsheng@623 1831 my ($self,$search) = (shift,shift);
dongsheng@623 1832 my $options = shift;
dongsheng@623 1833 my ($include,$remove,$unquoted, $regex) = (0,0,0,0);
dongsheng@623 1834
dongsheng@623 1835 if (defined($options->{include})) { $include = $options->{include}; }
dongsheng@623 1836 if (defined($options->{remove})) { $remove = $options->{remove}; }
dongsheng@623 1837 if (defined($options->{unquoted})) { $unquoted = $options->{unquoted}; }
dongsheng@623 1838 if (defined($options->{regex})) { $regex = $options->{regex}; }
dongsheng@623 1839
dongsheng@623 1840 my ($line,$ref) = $self->shiftline();
dongsheng@623 1841 my (@text,$paragraph);
dongsheng@623 1842 my ($eof,$found) = (0,0);
dongsheng@623 1843
dongsheng@623 1844 $search = "\Q$search\E" unless $regex;
dongsheng@623 1845 while (defined($line) and !$found) {
dongsheng@623 1846 push @text, ($line,$ref);
dongsheng@623 1847 $paragraph .= $line;
dongsheng@623 1848 if ($unquoted) {
dongsheng@623 1849 if ( $paragraph =~ /^((\".*?\")|(\'.*?\')|[^\"\'])*$search/s ) {
dongsheng@623 1850 $found = 1;
dongsheng@623 1851 }
dongsheng@623 1852 } else {
dongsheng@623 1853 if ( $paragraph =~ /$search/s ) {
dongsheng@623 1854 $found = 1;
dongsheng@623 1855 }
dongsheng@623 1856 }
dongsheng@623 1857 if (!$found) {
dongsheng@623 1858 ($line,$ref)=$self->shiftline();
dongsheng@623 1859 }
dongsheng@623 1860 }
dongsheng@623 1861
dongsheng@623 1862 if (!defined($line)) { $eof = 1; }
dongsheng@623 1863
dongsheng@623 1864 if ( $found ) {
dongsheng@623 1865 $line = "";
dongsheng@623 1866 if($unquoted) {
dongsheng@623 1867 $paragraph =~ /^(?:(?:\".*?\")|(?:\'.*?\')|[^\"\'])*?$search(.*)$/s;
dongsheng@623 1868 $line = $1;
dongsheng@623 1869 $text[$#text-1] =~ s/\Q$line\E$//s;
dongsheng@623 1870 } else {
dongsheng@623 1871 $paragraph =~ /$search(.*)$/s;
dongsheng@623 1872 $line = $1;
dongsheng@623 1873 $text[$#text-1] =~ s/\Q$line\E$//s;
dongsheng@623 1874 }
dongsheng@623 1875 if(!$include) {
dongsheng@623 1876 $text[$#text-1] =~ /^(.*)($search.*)$/s;
dongsheng@623 1877 $text[$#text-1] = $1;
dongsheng@623 1878 $line = $2.$line;
dongsheng@623 1879 }
dongsheng@623 1880 if (defined($line) and ($line ne "")) {
dongsheng@623 1881 $self->unshiftline ($line,$text[$#text]);
dongsheng@623 1882 }
dongsheng@623 1883 }
dongsheng@623 1884 if (!$remove) {
dongsheng@623 1885 $self->unshiftline (@text);
dongsheng@623 1886 }
dongsheng@623 1887
dongsheng@623 1888 #If we get to the end of the file, we return the whole paragraph
dongsheng@623 1889 return ($eof,@text);
dongsheng@623 1890 }
dongsheng@623 1891
dongsheng@623 1892 =item skip_spaces(\@)
dongsheng@623 1893
dongsheng@623 1894 This function receives as argument the reference to a paragraph (in the format
dongsheng@623 1895 returned by get_string_until), skips his heading spaces and returns them as
dongsheng@623 1896 a simple string.
dongsheng@623 1897
dongsheng@623 1898 =cut
dongsheng@623 1899
dongsheng@623 1900 sub skip_spaces {
dongsheng@623 1901 my ($self,$pstring)=@_;
dongsheng@623 1902 my $space="";
dongsheng@623 1903
dongsheng@623 1904 while (@$pstring and (@$pstring[0] =~ /^(\s+)(.*)$/s or @$pstring[0] eq "")) {
dongsheng@623 1905 if (@$pstring[0] ne "") {
dongsheng@623 1906 $space .= $1;
dongsheng@623 1907 @$pstring[0] = $2;
dongsheng@623 1908 }
dongsheng@623 1909
dongsheng@623 1910 if (@$pstring[0] eq "") {
dongsheng@623 1911 shift @$pstring;
dongsheng@623 1912 shift @$pstring;
dongsheng@623 1913 }
dongsheng@623 1914 }
dongsheng@623 1915 return $space;
dongsheng@623 1916 }
dongsheng@623 1917
dongsheng@623 1918 =item join_lines(@)
dongsheng@623 1919
dongsheng@623 1920 This function returns a simple string with the text from the argument array
dongsheng@623 1921 (discarding the references).
dongsheng@623 1922
dongsheng@623 1923 =cut
dongsheng@623 1924
dongsheng@623 1925 sub join_lines {
dongsheng@623 1926 my ($self,@lines)=@_;
dongsheng@623 1927 my ($line,$ref);
dongsheng@623 1928 my $text = "";
dongsheng@623 1929 while ($#lines > 0) {
dongsheng@623 1930 ($line,$ref) = (shift @lines,shift @lines);
dongsheng@623 1931 $text .= $line;
dongsheng@623 1932 }
dongsheng@623 1933 return $text;
dongsheng@623 1934 }
dongsheng@623 1935
dongsheng@623 1936 =back
dongsheng@623 1937
dongsheng@623 1938 =head1 STATUS OF THIS MODULE
dongsheng@623 1939
dongsheng@623 1940 This module can translate tags and attributes.
dongsheng@623 1941
dongsheng@623 1942 =head1 TODO LIST
dongsheng@623 1943
dongsheng@623 1944 DOCTYPE (ENTITIES)
dongsheng@623 1945
dongsheng@623 1946 There is a minimal support for the translation of entities. They are
dongsheng@623 1947 translated as a whole, and tags are not taken into account. Multilines
dongsheng@623 1948 entities are not supported and entities are always rewrapped during the
dongsheng@623 1949 translation.
dongsheng@623 1950
dongsheng@623 1951 MODIFY TAG TYPES FROM INHERITED MODULES
dongsheng@623 1952 (move the tag_types structure inside the $self hash?)
dongsheng@623 1953
dongsheng@623 1954 =head1 SEE ALSO
dongsheng@623 1955
dongsheng@623 1956 L<po4a(7)|po4a.7>, L<Locale::Po4a::TransTractor(3pm)|Locale::Po4a::TransTractor>.
dongsheng@623 1957
dongsheng@623 1958 =head1 AUTHORS
dongsheng@623 1959
dongsheng@623 1960 Jordi Vilalta <jvprat@gmail.com>
dongsheng@623 1961 Nicolas François <nicolas.francois@centraliens.net>
dongsheng@623 1962
dongsheng@623 1963 =head1 COPYRIGHT AND LICENSE
dongsheng@623 1964
dongsheng@623 1965 Copyright (c) 2004 by Jordi Vilalta <jvprat@gmail.com>
dongsheng@623 1966 Copyright (c) 2008-2009 by Nicolas François <nicolas.francois@centraliens.net>
dongsheng@623 1967
dongsheng@623 1968 This program is free software; you may redistribute it and/or modify it
dongsheng@623 1969 under the terms of GPL (see the COPYING file).
dongsheng@623 1970
dongsheng@623 1971 =cut
dongsheng@623 1972
dongsheng@623 1973 1;