hgbook

view tools/po4a/lib/Locale/Po4a/Po.pm @ 627:8271c8891b0e

Add Chinese translation
author Dongsheng Song <dongsheng.song@gmail.com>
date Thu Mar 12 15:53:01 2009 +0800 (2009-03-12)
parents
children
line source
1 # Locale::Po4a::Po -- manipulation of po files
2 # $Id: Po.pm,v 1.95 2009-02-28 22:18:39 nekral-guest Exp $
3 #
4 # This program is free software; you may redistribute it and/or modify it
5 # under the terms of GPL (see COPYING).
7 ############################################################################
8 # Modules and declarations
9 ############################################################################
11 =head1 NAME
13 Locale::Po4a::Po - po file manipulation module
15 =head1 SYNOPSIS
17 use Locale::Po4a::Po;
18 my $pofile=Locale::Po4a::Po->new();
20 # Read po file
21 $pofile->read('file.po');
23 # Add an entry
24 $pofile->push('msgid' => 'Hello', 'msgstr' => 'bonjour',
25 'flags' => "wrap", 'reference'=>'file.c:46');
27 # Extract a translation
28 $pofile->gettext("Hello"); # returns 'bonjour'
30 # Write back to a file
31 $pofile->write('otherfile.po');
33 =head1 DESCRIPTION
35 Locale::Po4a::Po is a module that allows you to manipulate message
36 catalogs. You can load and write from/to a file (which extension is often
37 I<po>), you can build new entries on the fly or request for the translation
38 of a string.
40 For a more complete description of message catalogs in the po format and
41 their use, please refer to the documentation of the gettext program.
43 This module is part of the PO4A project, which objective is to use po files
44 (designed at origin to ease the translation of program messages) to
45 translate everything, including documentation (man page, info manual),
46 package description, debconf templates, and everything which may benefit
47 from this.
49 =head1 OPTIONS ACCEPTED BY THIS MODULE
51 =over 4
53 =item porefs
55 This specifies the reference format. It can be one of 'none' to not produce
56 any reference, 'noline' to not specify the line number, and 'full' to
57 include complete references.
59 =back
61 =cut
63 use IO::File;
66 require Exporter;
68 package Locale::Po4a::Po;
69 use DynaLoader;
71 use Locale::Po4a::Common qw(wrap_msg wrap_mod wrap_ref_mod dgettext);
73 use subs qw(makespace);
74 use vars qw(@ISA @EXPORT_OK);
75 @ISA = qw(Exporter DynaLoader);
76 @EXPORT = qw(%debug);
77 @EXPORT_OK = qw(&move_po_if_needed);
79 use Locale::Po4a::TransTractor;
80 # Try to use a C extension if present.
81 eval("bootstrap Locale::Po4a::Po $Locale::Po4a::TransTractor::VERSION");
83 use 5.006;
84 use strict;
85 use warnings;
87 use Carp qw(croak);
88 use File::Path; # mkdir before write
89 use File::Copy; # move
90 use POSIX qw(strftime floor);
91 use Time::Local;
93 use Encode;
95 my @known_flags=qw(wrap no-wrap c-format fuzzy);
97 our %debug=('canonize' => 0,
98 'quote' => 0,
99 'escape' => 0,
100 'encoding' => 0,
101 'filter' => 0);
103 =head1 Functions about whole message catalogs
105 =over 4
107 =item new()
109 Creates a new message catalog. If an argument is provided, it's the name of
110 a po file we should load.
112 =cut
114 sub new {
115 my ($this, $options) = (shift, shift);
116 my $class = ref($this) || $this;
117 my $self = {};
118 bless $self, $class;
119 $self->initialize($options);
121 my $filename = shift;
122 $self->read($filename) if defined($filename) && length($filename);
123 return $self;
124 }
126 # Return the numerical timezone (e.g. +0200)
127 # Neither the %z nor the %s formats of strftime are portable:
128 # '%s' is not supported on Solaris and '%z' indicates
129 # "2006-10-25 19:36E. Europe Standard Time" on MS Windows.
130 sub timezone {
131 my @g = gmtime();
132 my @l = localtime();
134 my $diff;
135 $diff = floor(timelocal(@l)/60 +0.5);
136 $diff -= floor(timelocal(@g)/60 +0.5);
138 my $h = floor($diff / 60) + $l[8]; # $l[8] indicates if we are currently
139 # in a daylight saving time zone
140 my $m = $diff%60;
142 return sprintf "%+03d%02d\n", $h, $m;
143 }
145 sub initialize {
146 my ($self, $options) = (shift, shift);
147 my $date = strftime("%Y-%m-%d %H:%M", localtime).timezone();
148 chomp $date;
149 # $options = ref($options) || $options;
151 $self->{options}{'porefs'}= 'full';
152 $self->{options}{'msgid-bugs-address'}= undef;
153 $self->{options}{'copyright-holder'}= "Free Software Foundation, Inc.";
154 $self->{options}{'package-name'}= "PACKAGE";
155 $self->{options}{'package-version'}= "VERSION";
156 foreach my $opt (keys %$options) {
157 if ($options->{$opt}) {
158 die wrap_mod("po4a::po",
159 dgettext ("po4a", "Unknown option: %s"), $opt)
160 unless exists $self->{options}{$opt};
161 $self->{options}{$opt} = $options->{$opt};
162 }
163 }
164 $self->{options}{'porefs'} =~ /^(full|noline|none)$/ ||
165 die wrap_mod("po4a::po",
166 dgettext ("po4a",
167 "Invalid value for option 'porefs' ('%s' is ".
168 "not one of 'full', 'noline' or 'none')"),
169 $self->{options}{'porefs'});
171 $self->{po}=();
172 $self->{count}=0; # number of msgids in the PO
173 # count_doc: number of strings in the document
174 # (duplicate strings counted multiple times)
175 $self->{count_doc}=0;
176 $self->{header_comment}=
177 " SOME DESCRIPTIVE TITLE\n"
178 ." Copyright (C) YEAR ".
179 $self->{options}{'copyright-holder'}."\n"
180 ." This file is distributed under the same license ".
181 "as the ".$self->{options}{'package-name'}." package.\n"
182 ." FIRST AUTHOR <EMAIL\@ADDRESS>, YEAR.\n"
183 ."\n"
184 .", fuzzy";
185 # $self->header_tag="fuzzy";
186 $self->{header}=escape_text("Project-Id-Version: ".
187 $self->{options}{'package-name'}." ".
188 $self->{options}{'package-version'}."\n".
189 ((defined $self->{options}{'msgid-bugs-address'})?
190 "Report-Msgid-Bugs-To: ".$self->{options}{'msgid-bugs-address'}."\n":
191 "").
192 "POT-Creation-Date: $date\n".
193 "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n".
194 "Last-Translator: FULL NAME <EMAIL\@ADDRESS>\n".
195 "Language-Team: LANGUAGE <LL\@li.org>\n".
196 "MIME-Version: 1.0\n".
197 "Content-Type: text/plain; charset=CHARSET\n".
198 "Content-Transfer-Encoding: ENCODING");
200 $self->{encoder}=find_encoding("ascii");
202 # To make stats about gettext hits
203 $self->stats_clear();
204 }
206 =item read($)
208 Reads a po file (which name is given as argument). Previously existing
209 entries in self are not removed, the new ones are added to the end of the
210 catalog.
212 =cut
214 sub read {
215 my $self=shift;
216 my $filename=shift
217 or croak wrap_mod("po4a::po",
218 dgettext("po4a",
219 "Please provide a non-null filename"));
221 my $fh;
222 if ($filename eq '-') {
223 $fh=*STDIN;
224 } else {
225 open $fh,"<$filename"
226 or croak wrap_mod("po4a::po",
227 dgettext("po4a", "Can't read from %s: %s"),
228 $filename, $!);
229 }
231 ## Read paragraphs line-by-line
232 my $pofile="";
233 my $textline;
234 while (defined ($textline = <$fh>)) {
235 $pofile .= $textline;
236 }
237 # close INPUT
238 # or croak (sprintf(dgettext("po4a",
239 # "Can't close %s after reading: %s"),
240 # $filename,$!)."\n");
242 my $linenum=0;
244 foreach my $msg (split (/\n\n/,$pofile)) {
245 my ($msgid,$msgstr,$comment,$automatic,$reference,$flags,$buffer);
246 my ($msgid_plural, $msgstr_plural);
247 foreach my $line (split (/\n/,$msg)) {
248 $linenum++;
249 if ($line =~ /^#\. ?(.*)$/) { # Automatic comment
250 $automatic .= (defined($automatic) ? "\n" : "").$1;
252 } elsif ($line =~ /^#: ?(.*)$/) { # reference
253 $reference .= (defined($reference) ? "\n" : "").$1;
255 } elsif ($line =~ /^#, ?(.*)$/) { # flags
256 $flags .= (defined($flags) ? "\n" : "").$1;
258 } elsif ($line =~ /^#(.*)$/) { # Translator comments
259 $comment .= (defined($comment) ? "\n" : "").($1||"");
261 } elsif ($line =~ /^msgid (".*")$/) { # begin of msgid
262 $buffer = $1;
264 } elsif ($line =~ /^msgid_plural (".*")$/) {
265 # begin of msgid_plural, end of msgid
267 $msgid = $buffer;
268 $buffer = $1;
270 } elsif ($line =~ /^msgstr (".*")$/) {
271 # begin of msgstr, end of msgid
273 $msgid = $buffer;
274 $buffer = "$1";
276 } elsif ($line =~ /^msgstr\[([0-9]+)\] (".*")$/) {
277 # begin of msgstr[x], end of msgid_plural or msgstr[x-1]
279 # Note: po4a cannot uses plural forms
280 # (no integer to use the plural form)
281 # * drop the msgstr[x] where x >= 2
282 # * use msgstr[0] as the translation of msgid
283 # * use msgstr[1] as the translation of msgid_plural
285 if ($1 eq "0") {
286 $msgid_plural = $buffer;
287 $buffer = "$2";
288 } elsif ($1 eq "1") {
289 $msgstr = $buffer;
290 $buffer = "$2";
291 } elsif ($1 eq "2") {
292 $msgstr_plural = $buffer;
293 warn wrap_ref_mod("$filename:$linenum",
294 "po4a::po",
295 dgettext("po4a", "Messages with more than 2 plural forms are not supported."));
296 }
297 } elsif ($line =~ /^(".*")$/) {
298 # continuation of a line
299 $buffer .= "\n$1";
301 } else {
302 warn wrap_ref_mod("$filename:$linenum",
303 "po4a::po",
304 dgettext("po4a", "Strange line: -->%s<--"),
305 $line);
306 }
307 }
308 $linenum++;
309 if (defined $msgid_plural) {
310 $msgstr_plural=$buffer;
312 $msgid = unquote_text($msgid) if (defined($msgid));
313 $msgstr = unquote_text($msgstr) if (defined($msgstr));
315 $self->push_raw ('msgid' => $msgid,
316 'msgstr' => $msgstr,
317 'reference' => $reference,
318 'flags' => $flags,
319 'comment' => $comment,
320 'automatic' => $automatic,
321 'plural' => 0);
323 $msgid_plural = unquote_text($msgid_plural)
324 if (defined($msgid_plural));
325 $msgstr_plural = unquote_text($msgstr_plural)
326 if (defined($msgstr_plural));
328 $self->push_raw ('msgid' => $msgid_plural,
329 'msgstr' => $msgstr_plural,
330 'reference' => $reference,
331 'flags' => $flags,
332 'comment' => $comment,
333 'automatic' => $automatic,
334 'plural' => 1);
335 } else {
336 $msgstr=$buffer;
338 $msgid = unquote_text($msgid) if (defined($msgid));
339 $msgstr = unquote_text($msgstr) if (defined($msgstr));
341 $self->push_raw ('msgid' => $msgid,
342 'msgstr' => $msgstr,
343 'reference' => $reference,
344 'flags' => $flags,
345 'comment' => $comment,
346 'automatic' => $automatic);
347 }
348 }
349 }
351 =item write($)
353 Writes the current catalog to the given file.
355 =cut
357 sub write{
358 my $self=shift;
359 my $filename=shift
360 or croak dgettext("po4a","Can't write to a file without filename")."\n";
362 my $fh;
363 if ($filename eq '-') {
364 $fh=\*STDOUT;
365 } else {
366 # make sure the directory in which we should write the localized
367 # file exists
368 my $dir = $filename;
369 if ($dir =~ m|/|) {
370 $dir =~ s|/[^/]*$||;
372 File::Path::mkpath($dir, 0, 0755) # Croaks on error
373 if (length ($dir) && ! -e $dir);
374 }
375 open $fh,">$filename"
376 or croak wrap_mod("po4a::po",
377 dgettext("po4a", "Can't write to %s: %s"),
378 $filename, $!);
379 }
381 print $fh "".format_comment($self->{header_comment},"")
382 if defined($self->{header_comment}) && length($self->{header_comment});
384 print $fh "msgid \"\"\n";
385 print $fh "msgstr ".quote_text($self->{header})."\n\n";
388 my $buf_msgstr_plural; # USed to keep the first msgstr of plural forms
389 my $first=1;
390 foreach my $msgid ( sort { ($self->{po}{"$a"}{'pos'}) <=>
391 ($self->{po}{"$b"}{'pos'})
392 } keys %{$self->{po}}) {
393 my $output="";
395 if ($first) {
396 $first=0;
397 } else {
398 $output .= "\n";
399 }
401 $output .= format_comment($self->{po}{$msgid}{'comment'},"")
402 if defined($self->{po}{$msgid}{'comment'})
403 && length ($self->{po}{$msgid}{'comment'});
404 if ( defined($self->{po}{$msgid}{'automatic'})
405 && length ($self->{po}{$msgid}{'automatic'})) {
406 foreach my $comment (split(/\\n/,$self->{po}{$msgid}{'automatic'}))
407 {
408 $output .= format_comment($comment, ". ")
409 }
410 }
411 $output .= format_comment($self->{po}{$msgid}{'type'},". type: ")
412 if defined($self->{po}{$msgid}{'type'})
413 && length ($self->{po}{$msgid}{'type'});
414 $output .= format_comment($self->{po}{$msgid}{'reference'},": ")
415 if defined($self->{po}{$msgid}{'reference'})
416 && length ($self->{po}{$msgid}{'reference'});
417 $output .= "#, ". join(", ", sort split(/\s+/,$self->{po}{$msgid}{'flags'}))."\n"
418 if defined($self->{po}{$msgid}{'flags'})
419 && length ($self->{po}{$msgid}{'flags'});
421 if (exists $self->{po}{$msgid}{'plural'}) {
422 if ($self->{po}{$msgid}{'plural'} == 0) {
423 if ($self->get_charset =~ /^utf-8$/i) {
424 my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'});
425 $msgid = Encode::decode_utf8($msgid);
426 $output .= Encode::encode_utf8("msgid ".quote_text($msgid)."\n");
427 $buf_msgstr_plural = Encode::encode_utf8("msgstr[0] ".quote_text($msgstr)."\n");
428 } else {
429 $output = "msgid ".quote_text($msgid)."\n";
430 $buf_msgstr_plural = "msgstr[0] ".quote_text($self->{po}{$msgid}{'msgstr'})."\n";
431 }
432 } elsif ($self->{po}{$msgid}{'plural'} == 1) {
433 # TODO: there may be only one plural form
434 if ($self->get_charset =~ /^utf-8$/i) {
435 my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'});
436 $msgid = Encode::decode_utf8($msgid);
437 $output = Encode::encode_utf8("msgid_plural ".quote_text($msgid)."\n");
438 $output .= $buf_msgstr_plural;
439 $output .= Encode::encode_utf8("msgstr[1] ".quote_text($msgstr)."\n");
440 $buf_msgstr_plural = "";
441 } else {
442 $output = "msgid_plural ".quote_text($msgid)."\n";
443 $output .= $buf_msgstr_plural;
444 $output .= "msgstr[1] ".quote_text($self->{po}{$msgid}{'msgstr'})."\n";
445 }
446 } else {
447 die wrap_msg(dgettext("po4a","Can't write PO files with more than two plural forms."));
448 }
449 } else {
450 if ($self->get_charset =~ /^utf-8$/i) {
451 my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'});
452 $msgid = Encode::decode_utf8($msgid);
453 $output .= Encode::encode_utf8("msgid ".quote_text($msgid)."\n");
454 $output .= Encode::encode_utf8("msgstr ".quote_text($msgstr)."\n");
455 } else {
456 $output .= "msgid ".quote_text($msgid)."\n";
457 $output .= "msgstr ".quote_text($self->{po}{$msgid}{'msgstr'})."\n";
458 }
459 }
461 print $fh $output;
462 }
463 # print STDERR "$fh";
464 # if ($filename ne '-') {
465 # close $fh
466 # or croak (sprintf(dgettext("po4a",
467 # "Can't close %s after writing: %s\n"),
468 # $filename,$!));
469 # }
470 }
472 =item write_if_needed($$)
474 Like write, but if the PO or POT file already exists, the object will be
475 written in a temporary file which will be compared with the existing file
476 to check that the update is needed (this avoids to change a POT just to
477 update a line reference or the POT-Creation-Date field).
479 =cut
481 sub move_po_if_needed {
482 my ($new_po, $old_po, $backup) = (shift, shift, shift);
483 my $diff;
485 if (-e $old_po) {
486 my $diff_ignore = "-I'^#:' "
487 ."-I'^\"POT-Creation-Date:' "
488 ."-I'^\"PO-Revision-Date:'";
489 $diff = qx(diff -q $diff_ignore $old_po $new_po);
490 if ( $diff eq "" ) {
491 unlink $new_po
492 or die wrap_msg(dgettext("po4a","Can't unlink %s: %s."),
493 $new_po, $!);
494 # touch the old PO
495 my ($atime, $mtime) = (time,time);
496 utime $atime, $mtime, $old_po;
497 } else {
498 if ($backup) {
499 copy $old_po, $old_po."~"
500 or die wrap_msg(dgettext("po4a","Can't copy %s to %s: %s."),
501 $old_po, $old_po."~", $!);
502 } else {
503 }
504 move $new_po, $old_po
505 or die wrap_msg(dgettext("po4a","Can't move %s to %s: %s."),
506 $new_po, $old_po, $!);
507 }
508 } else {
509 move $new_po, $old_po
510 or die wrap_msg(dgettext("po4a","Can't move %s to %s: %s."),
511 $new_po, $old_po, $!);
512 }
513 }
515 sub write_if_needed {
516 my $self=shift;
517 my $filename=shift
518 or croak dgettext("po4a","Can't write to a file without filename")."\n";
520 if (-e $filename) {
521 my ($tmp_filename);
522 (undef,$tmp_filename)=File::Temp->tempfile($filename."XXXX",
523 DIR => "/tmp",
524 OPEN => 0,
525 UNLINK => 0);
526 $self->write($tmp_filename);
527 move_po_if_needed($tmp_filename, $filename);
528 } else {
529 $self->write($filename);
530 }
531 }
533 =item gettextize($$)
535 This function produces one translated message catalog from two catalogs, an
536 original and a translation. This process is described in L<po4a(7)|po4a.7>,
537 section I<Gettextization: how does it work?>.
539 =cut
541 sub gettextize {
542 my $this = shift;
543 my $class = ref($this) || $this;
544 my ($poorig,$potrans)=(shift,shift);
546 my $pores=Locale::Po4a::Po->new();
548 my $please_fail = 0;
549 my $toobad = dgettext("po4a",
550 "\nThe gettextization failed (once again). Don't give up, ".
551 "gettextizing is a subtle art, but this is only needed once ".
552 "to convert a project to the gorgeous luxus offered by po4a ".
553 "to translators.".
554 "\nPlease refer to the po4a(7) documentation, the section ".
555 "\"HOWTO convert a pre-existing translation to po4a?\" ".
556 "contains several hints to help you in your task");
558 # Don't fail right now when the entry count does not match. Instead, give
559 # it a try so that the user can see where we fail (which is probably where
560 # the problem is).
561 if ($poorig->count_entries_doc() > $potrans->count_entries_doc()) {
562 warn wrap_mod("po4a gettextize", dgettext("po4a",
563 "Original has more strings than the translation (%d>%d). ".
564 "Please fix it by editing the translated version to add ".
565 "some dummy entry."),
566 $poorig->count_entries_doc(),
567 $potrans->count_entries_doc());
568 $please_fail = 1;
569 } elsif ($poorig->count_entries_doc() < $potrans->count_entries_doc()) {
570 warn wrap_mod("po4a gettextize", dgettext("po4a",
571 "Original has less strings than the translation (%d<%d). ".
572 "Please fix it by removing the extra entry from the ".
573 "translated file. You may need an addendum (cf po4a(7)) ".
574 "to reput the chunk in place after gettextization. A ".
575 "possible cause is that a text duplicated in the original ".
576 "is not translated the same way each time. Remove one of ".
577 "the translations, and you're fine."),
578 $poorig->count_entries_doc(),
579 $potrans->count_entries_doc());
580 $please_fail = 1;
581 }
583 if ( $poorig->get_charset =~ /^utf-8$/i ) {
584 $potrans->to_utf8;
585 $pores->set_charset("utf-8");
586 } else {
587 if ($potrans->get_charset eq "CHARSET") {
588 $pores->set_charset("ascii");
589 } else {
590 $pores->set_charset($potrans->get_charset);
591 }
592 }
593 print "Po character sets:\n".
594 " original=".$poorig->get_charset."\n".
595 " translated=".$potrans->get_charset."\n".
596 " result=".$pores->get_charset."\n"
597 if $debug{'encoding'};
599 for (my ($o,$t)=(0,0) ;
600 $o<$poorig->count_entries_doc() && $t<$potrans->count_entries_doc();
601 $o++,$t++) {
602 #
603 # Extract some informations
605 my ($orig,$trans)=($poorig->msgid_doc($o),$potrans->msgid_doc($t));
606 # print STDERR "Matches [[$orig]]<<$trans>>\n";
608 my ($reforig,$reftrans)=($poorig->{po}{$orig}{'reference'},
609 $potrans->{po}{$trans}{'reference'});
610 my ($typeorig,$typetrans)=($poorig->{po}{$orig}{'type'},
611 $potrans->{po}{$trans}{'type'});
613 #
614 # Make sure the type of both string exist
615 #
616 die wrap_mod("po4a gettextize",
617 "Internal error: type of original string number %s ".
618 "isn't provided", $o)
619 if ($typeorig eq '');
621 die wrap_mod("po4a gettextize",
622 "Internal error: type of translated string number %s ".
623 "isn't provided", $o)
624 if ($typetrans eq '');
626 #
627 # Make sure both type are the same
628 #
629 if ($typeorig ne $typetrans){
630 $pores->write("gettextization.failed.po");
631 die wrap_msg(dgettext("po4a",
632 "po4a gettextization: Structure disparity between ".
633 "original and translated files:\n".
634 "msgid (at %s) is of type '%s' while\n".
635 "msgstr (at %s) is of type '%s'.\n".
636 "Original text: %s\n".
637 "Translated text: %s\n".
638 "(result so far dumped to gettextization.failed.po)").
639 "%s",
640 $reforig, $typeorig,
641 $reftrans, $typetrans,
642 $orig,
643 $trans,
644 $toobad);
645 }
647 #
648 # Push the entry
649 #
650 my $flags;
651 if (defined $poorig->{po}{$orig}{'flags'}) {
652 $flags = $poorig->{po}{$orig}{'flags'}." fuzzy";
653 } else {
654 $flags = "fuzzy";
655 }
656 $pores->push_raw('msgid' => $orig,
657 'msgstr' => $trans,
658 'flags' => $flags,
659 'type' => $typeorig,
660 'reference' => $reforig,
661 'conflict' => 1,
662 'transref' => $potrans->{po}{$trans}{'reference'})
663 unless (defined($pores->{po}{$orig})
664 and ($pores->{po}{$orig}{'msgstr'} eq $trans))
665 # FIXME: maybe we should be smarter about what reference should be
666 # sent to push_raw.
667 }
669 # make sure we return a useful error message when entry count differ
670 die "$toobad\n" if $please_fail;
672 return $pores;
673 }
675 =item filter($)
677 This function extracts a catalog from an existing one. Only the entries having
678 a reference in the given file will be placed in the resulting catalog.
680 This function parses its argument, converts it to a perl function definition,
681 eval this definition and filter the fields for which this function returns
682 true.
684 I love perl sometimes ;)
686 =cut
688 sub filter {
689 my $self=shift;
690 our $filter=shift;
692 my $res;
693 $res = Locale::Po4a::Po->new();
695 # Parse the filter
696 our $code="sub apply { return ";
697 our $pos=0;
698 our $length = length $filter;
700 # explode chars to parts. How to subscript a string in Perl?
701 our @filter = split(//,$filter);
703 sub gloups {
704 my $fmt=shift;
705 my $space = "";
706 for (1..$pos){
707 $space .= ' ';
708 }
709 die wrap_msg("$fmt\n$filter\n$space^ HERE");
710 }
711 sub showmethecode {
712 return unless $debug{'filter'};
713 my $fmt=shift;
714 my $space="";
715 for (1..$pos){
716 $space .= ' ';
717 }
718 print STDERR "$filter\n$space^ $fmt\n";#"$code\n";
719 }
721 # I dream of a lex in perl :-/
722 sub parse_expression {
723 showmethecode("Begin expression")
724 if $debug{'filter'};
726 gloups("Begin of expression expected, got '%s'",$filter[$pos])
727 unless ($filter[$pos] eq '(');
728 $pos ++; # pass the '('
729 if ($filter[$pos] eq '&') {
730 # AND
731 $pos++;
732 showmethecode("Begin of AND")
733 if $debug{'filter'};
734 $code .= "(";
735 while (1) {
736 gloups ("Unfinished AND statement.")
737 if ($pos == $length);
738 parse_expression();
739 if ($filter[$pos] eq '(') {
740 $code .= " && ";
741 } elsif ($filter[$pos] eq ')') {
742 last; # do not eat that char
743 } else {
744 gloups("End of AND or begin of sub-expression expected, got '%s'", $filter[$pos]);
745 }
746 }
747 $code .= ")";
748 } elsif ($filter[$pos] eq '|') {
749 # OR
750 $pos++;
751 $code .= "(";
752 while (1) {
753 gloups("Unfinished OR statement.")
754 if ($pos == $length);
755 parse_expression();
756 if ($filter[$pos] eq '(') {
757 $code .= " || ";
758 } elsif ($filter[$pos] eq ')') {
759 last; # do not eat that char
760 } else {
761 gloups("End of OR or begin of sub-expression expected, got '%s'",$filter[$pos]);
762 }
763 }
764 $code .= ")";
765 } elsif ($filter[$pos] eq '!') {
766 # NOT
767 $pos++;
768 $code .= "(!";
769 gloups("Missing sub-expression in NOT statement.")
770 if ($pos == $length);
771 parse_expression();
772 $code .= ")";
773 } else {
774 # must be an equal. Let's get field and argument
775 my ($field,$arg,$done);
776 $field = substr($filter,$pos);
777 gloups("EQ statement contains no '=' or invalid field name")
778 unless ($field =~ /([a-z]*)=/i);
779 $field = lc($1);
780 $pos += (length $field) + 1;
782 # check that we've got a valid field name,
783 # and the number it referes to
784 # DO NOT CHANGE THE ORDER
785 my @names=qw(msgid msgstr reference flags comment automatic);
786 my $fieldpos;
787 for ($fieldpos = 0;
788 $fieldpos < scalar @names && $field ne $names[$fieldpos];
789 $fieldpos++) {}
790 gloups("Invalid field name: %s",$field)
791 if $fieldpos == scalar @names; # not found
793 # Now, get the argument value. It has to be between quotes,
794 # which can be escaped
795 # We point right on the first char of the argument
796 # (first quote already eaten)
797 my $escaped = 0;
798 my $quoted = 0;
799 if ($filter[$pos] eq '"') {
800 $pos++;
801 $quoted = 1;
802 }
803 showmethecode(($quoted?"Quoted":"Unquoted")." argument of field '$field'")
804 if $debug{'filter'};
806 while (!$done) {
807 gloups("Unfinished EQ argument.")
808 if ($pos == $length);
810 if ($quoted) {
811 if ($filter[$pos] eq '\\') {
812 if ($escaped) {
813 $arg .= '\\';
814 $escaped = 0;
815 } else {
816 $escaped = 1;
817 }
818 } elsif ($escaped) {
819 if ($filter[$pos] eq '"') {
820 $arg .= '"';
821 $escaped = 0;
822 } else {
823 gloups("Invalid escape sequence in argument: '\\%s'",$filter[$pos]);
824 }
825 } else {
826 if ($filter[$pos] eq '"') {
827 $done = 1;
828 } else {
829 $arg .= $filter[$pos];
830 }
831 }
832 } else {
833 if ($filter[$pos] eq ')') {
834 # counter the next ++ since we don't want to eat
835 # this char
836 $pos--;
837 $done = 1;
838 } else {
839 $arg .= $filter[$pos];
840 }
841 }
842 $pos++;
843 }
844 # and now, add the code to check this equality
845 $code .= "(\$_[$fieldpos] =~ m/$arg/)";
847 }
848 showmethecode("End of expression")
849 if $debug{'filter'};
850 gloups("Unfinished statement.")
851 if ($pos == $length);
852 gloups("End of expression expected, got '%s'",$filter[$pos])
853 unless ($filter[$pos] eq ')');
854 $pos++;
855 }
856 # And now, launch the beast, finish the function and use eval
857 # to construct this function.
858 # Ok, the lack of lexer is a fair price for the eval ;)
859 parse_expression();
860 gloups("Garbage at the end of the expression")
861 if ($pos != $length);
862 $code .= "; }";
863 print STDERR "CODE = $code\n"
864 if $debug{'filter'};
865 eval $code;
866 die wrap_mod("po4a::po", dgettext("po4a", "Eval failure: %s"), $@)
867 if $@;
869 for (my $cpt=(0) ;
870 $cpt<$self->count_entries();
871 $cpt++) {
873 my ($msgid,$ref,$msgstr,$flags,$type,$comment,$automatic);
875 $msgid = $self->msgid($cpt);
876 $ref=$self->{po}{$msgid}{'reference'};
878 $msgstr= $self->{po}{$msgid}{'msgstr'};
879 $flags = $self->{po}{$msgid}{'flags'};
880 $type = $self->{po}{$msgid}{'type'};
881 $comment = $self->{po}{$msgid}{'comment'};
882 $automatic = $self->{po}{$msgid}{'automatic'};
884 # DO NOT CHANGE THE ORDER
885 $res->push_raw('msgid' => $msgid,
886 'msgstr' => $msgstr,
887 'flags' => $flags,
888 'type' => $type,
889 'reference' => $ref,
890 'comment' => $comment,
891 'automatic' => $automatic)
892 if (apply($msgid,$msgstr,$ref,$flags,$comment,$automatic));
893 }
894 # delete the apply subroutine
895 # otherwise it will be redefined.
896 undef &apply;
897 return $res;
898 }
900 =item to_utf8()
902 Recodes to utf-8 the po's msgstrs. Does nothing if the charset is not
903 specified in the po file ("CHARSET" value), or if it's already utf-8 or
904 ascii.
906 =cut
908 sub to_utf8 {
909 my $this = shift;
910 my $charset = $this->get_charset();
912 unless ($charset eq "CHARSET" or
913 $charset =~ /^ascii$/i or
914 $charset =~ /^utf-8$/i) {
915 foreach my $msgid ( keys %{$this->{po}} ) {
916 Encode::from_to($this->{po}{$msgid}{'msgstr'}, $charset, "utf-8");
917 }
918 $this->set_charset("utf-8");
919 }
920 }
922 =back
924 =head1 Functions to use a message catalog for translations
926 =over 4
928 =item gettext($%)
930 Request the translation of the string given as argument in the current catalog.
931 The function returns the original (untranslated) string if the string was not
932 found.
934 After the string to translate, you can pass a hash of extra
935 arguments. Here are the valid entries:
937 =over
939 =item wrap
941 boolean indicating whether we can consider that whitespaces in string are
942 not important. If yes, the function canonizes the string before looking for
943 a translation, and wraps the result.
945 =item wrapcol
947 The column at which we should wrap (default: 76).
949 =back
951 =cut
953 sub gettext {
954 my $self=shift;
955 my $text=shift;
956 my (%opt)=@_;
957 my $res;
959 return "" unless defined($text) && length($text); # Avoid returning the header.
960 my $validoption="reference wrap wrapcol";
961 my %validoption;
963 map { $validoption{$_}=1 } (split(/ /,$validoption));
964 foreach (keys %opt) {
965 Carp::confess "internal error: unknown arg $_.\n".
966 "Here are the valid options: $validoption.\n"
967 unless $validoption{$_};
968 }
970 $text=canonize($text)
971 if ($opt{'wrap'});
973 my $esc_text=escape_text($text);
975 $self->{gettextqueries}++;
977 if ( defined $self->{po}{$esc_text}
978 and defined $self->{po}{$esc_text}{'msgstr'}
979 and length $self->{po}{$esc_text}{'msgstr'}
980 and ( not defined $self->{po}{$esc_text}{'flags'}
981 or $self->{po}{$esc_text}{'flags'} !~ /fuzzy/)) {
983 $self->{gettexthits}++;
984 $res = unescape_text($self->{po}{$esc_text}{'msgstr'});
985 if (defined $self->{po}{$esc_text}{'plural'}) {
986 if ($self->{po}{$esc_text}{'plural'} eq "0") {
987 warn wrap_mod("po4a gettextize", dgettext("po4a",
988 "'%s' is the singular form of a message, ".
989 "po4a will use the msgstr[0] translation (%s)."),
990 $esc_text, $res);
991 } else {
992 warn wrap_mod("po4a gettextize", dgettext("po4a",
993 "'%s' is the plural form of a message, ".
994 "po4a will use the msgstr[1] translation (%s)."),
995 $esc_text, $res);
996 }
997 }
998 } else {
999 $res = $text;
1002 if ($opt{'wrap'}) {
1003 if ($self->get_charset =~ /^utf-8$/i) {
1004 $res=Encode::decode_utf8($res);
1005 $res=wrap ($res, $opt{'wrapcol'} || 76);
1006 $res=Encode::encode_utf8($res);
1007 } else {
1008 $res=wrap ($res, $opt{'wrapcol'} || 76);
1011 # print STDERR "Gettext >>>$text<<<(escaped=$esc_text)=[[[$res]]]\n\n";
1012 return $res;
1015 =item stats_get()
1017 Returns statistics about the hit ratio of gettext since the last time that
1018 stats_clear() was called. Please note that it's not the same
1019 statistics than the one printed by msgfmt --statistic. Here, it's statistics
1020 about recent usage of the po file, while msgfmt reports the status of the
1021 file. Example of use:
1023 [some use of the po file to translate stuff]
1025 ($percent,$hit,$queries) = $pofile->stats_get();
1026 print "So far, we found translations for $percent\% ($hit of $queries) of strings.\n";
1028 =cut
1030 sub stats_get() {
1031 my $self=shift;
1032 my ($h,$q)=($self->{gettexthits},$self->{gettextqueries});
1033 my $p = ($q == 0 ? 100 : int($h/$q*10000)/100);
1035 # $p =~ s/\.00//;
1036 # $p =~ s/(\..)0/$1/;
1038 return ( $p,$h,$q );
1041 =item stats_clear()
1043 Clears the statistics about gettext hits.
1045 =cut
1047 sub stats_clear {
1048 my $self = shift;
1049 $self->{gettextqueries} = 0;
1050 $self->{gettexthits} = 0;
1053 =back
1055 =head1 Functions to build a message catalog
1057 =over 4
1059 =item push(%)
1061 Push a new entry at the end of the current catalog. The arguments should
1062 form a hash table. The valid keys are:
1064 =over 4
1066 =item msgid
1068 the string in original language.
1070 =item msgstr
1072 the translation.
1074 =item reference
1076 an indication of where this string was found. Example: file.c:46 (meaning
1077 in 'file.c' at line 46). It can be a space-separated list in case of
1078 multiple occurrences.
1080 =item comment
1082 a comment added here manually (by the translators). The format here is free.
1084 =item automatic
1086 a comment which was automatically added by the string extraction
1087 program. See the I<--add-comments> option of the B<xgettext> program for
1088 more information.
1090 =item flags
1092 space-separated list of all defined flags for this entry.
1094 Valid flags are: c-text, python-text, lisp-text, elisp-text, librep-text,
1095 smalltalk-text, java-text, awk-text, object-pascal-text, ycp-text,
1096 tcl-text, wrap, no-wrap and fuzzy.
1098 See the gettext documentation for their meaning.
1100 =item type
1102 This is mostly an internal argument: it is used while gettextizing
1103 documents. The idea here is to parse both the original and the translation
1104 into a po object, and merge them, using one's msgid as msgid and the
1105 other's msgid as msgstr. To make sure that things get ok, each msgid in po
1106 objects are given a type, based on their structure (like "chapt", "sect1",
1107 "p" and so on in docbook). If the types of strings are not the same, that
1108 means that both files do not share the same structure, and the process
1109 reports an error.
1111 This information is written as automatic comment in the po file since this
1112 gives to translators some context about the strings to translate.
1114 =item wrap
1116 boolean indicating whether whitespaces can be mangled in cosmetic
1117 reformattings. If true, the string is canonized before use.
1119 This information is written to the po file using the 'wrap' or 'no-wrap' flag.
1121 =item wrapcol
1123 The column at which we should wrap (default: 76).
1125 This information is not written to the po file.
1127 =back
1129 =cut
1131 sub push {
1132 my $self=shift;
1133 my %entry=@_;
1135 my $validoption="wrap wrapcol type msgid msgstr automatic flags reference";
1136 my %validoption;
1138 map { $validoption{$_}=1 } (split(/ /,$validoption));
1139 foreach (keys %entry) {
1140 Carp::confess "internal error: unknown arg $_.\n".
1141 "Here are the valid options: $validoption.\n"
1142 unless $validoption{$_};
1145 unless ($entry{'wrap'}) {
1146 $entry{'flags'} .= " no-wrap";
1148 if (defined ($entry{'msgid'})) {
1149 $entry{'msgid'} = canonize($entry{'msgid'})
1150 if ($entry{'wrap'});
1152 $entry{'msgid'} = escape_text($entry{'msgid'});
1154 if (defined ($entry{'msgstr'})) {
1155 $entry{'msgstr'} = canonize($entry{'msgstr'})
1156 if ($entry{'wrap'});
1158 $entry{'msgstr'} = escape_text($entry{'msgstr'});
1161 $self->push_raw(%entry);
1164 # The same as push(), but assuming that msgid and msgstr are already escaped
1165 sub push_raw {
1166 my $self=shift;
1167 my %entry=@_;
1168 my ($msgid,$msgstr,$reference,$comment,$automatic,$flags,$type,$transref)=
1169 ($entry{'msgid'},$entry{'msgstr'},
1170 $entry{'reference'},$entry{'comment'},$entry{'automatic'},
1171 $entry{'flags'},$entry{'type'},$entry{'transref'});
1172 my $keep_conflict = $entry{'conflict'};
1174 # print STDERR "Push_raw\n";
1175 # print STDERR " msgid=>>>$msgid<<<\n" if $msgid;
1176 # print STDERR " msgstr=[[[$msgstr]]]\n" if $msgstr;
1177 # Carp::cluck " flags=$flags\n" if $flags;
1179 return unless defined($entry{'msgid'});
1181 #no msgid => header definition
1182 unless (length($entry{'msgid'})) {
1183 # if (defined($self->{header}) && $self->{header} =~ /\S/) {
1184 # warn dgettext("po4a","Redefinition of the header. ".
1185 # "The old one will be discarded\n");
1186 # } FIXME: do that iff the header isn't the default one.
1187 $self->{header}=$msgstr;
1188 $self->{header_comment}=$comment;
1189 my $charset = $self->get_charset;
1190 if ($charset ne "CHARSET") {
1191 $self->{encoder}=find_encoding($charset);
1192 } else {
1193 $self->{encoder}=find_encoding("ascii");
1195 return;
1198 if ($self->{options}{'porefs'} eq "none") {
1199 $reference = "";
1200 } elsif ($self->{options}{'porefs'} eq "noline") {
1201 $reference =~ s/:[0-9]*/:1/g;
1204 if (defined($self->{po}{$msgid})) {
1205 warn wrap_mod("po4a::po",
1206 dgettext("po4a","msgid defined twice: %s"),
1207 $msgid)
1208 if (0); # FIXME: put a verbose stuff
1209 if ( defined $msgstr
1210 and defined $self->{po}{$msgid}{'msgstr'}
1211 and $self->{po}{$msgid}{'msgstr'} ne $msgstr) {
1212 my $txt=quote_text($msgid);
1213 my ($first,$second)=
1214 (format_comment(". ",$self->{po}{$msgid}{'reference'}).
1215 quote_text($self->{po}{$msgid}{'msgstr'}),
1217 format_comment(". ",$reference).
1218 quote_text($msgstr));
1220 if ($keep_conflict) {
1221 if ($self->{po}{$msgid}{'msgstr'} =~ m/^#-#-#-#-# .* #-#-#-#-#\\n/s) {
1222 $msgstr = $self->{po}{$msgid}{'msgstr'}.
1223 "\\n#-#-#-#-# $transref #-#-#-#-#\\n".
1224 $msgstr;
1225 } else {
1226 $msgstr = "#-#-#-#-# ".
1227 $self->{po}{$msgid}{'transref'}.
1228 " #-#-#-#-#\\n".
1229 $self->{po}{$msgid}{'msgstr'}."\\n".
1230 "#-#-#-#-# $transref #-#-#-#-#\\n".
1231 $msgstr;
1233 # Every msgid will have the same list of references.
1234 # Only keep the last list.
1235 $self->{po}{$msgid}{'reference'} = "";
1236 } else {
1237 warn wrap_msg(dgettext("po4a",
1238 "Translations don't match for:\n".
1239 "%s\n".
1240 "-->First translation:\n".
1241 "%s\n".
1242 " Second translation:\n".
1243 "%s\n".
1244 " Old translation discarded."),
1245 $txt,$first,$second);
1249 if (defined $transref) {
1250 $self->{po}{$msgid}{'transref'} = $transref;
1252 if (defined $reference) {
1253 if (defined $self->{po}{$msgid}{'reference'}) {
1254 $self->{po}{$msgid}{'reference'} .= " ".$reference;
1255 } else {
1256 $self->{po}{$msgid}{'reference'} = $reference;
1259 $self->{po}{$msgid}{'msgstr'} = $msgstr;
1260 $self->{po}{$msgid}{'comment'} = $comment;
1261 $self->{po}{$msgid}{'automatic'} = $automatic;
1262 if (defined($self->{po}{$msgid}{'pos_doc'})) {
1263 $self->{po}{$msgid}{'pos_doc'} .= " ".$self->{count_doc}++;
1264 } else {
1265 $self->{po}{$msgid}{'pos_doc'} = $self->{count_doc}++;
1267 unless (defined($self->{po}{$msgid}{'pos'})) {
1268 $self->{po}{$msgid}{'pos'} = $self->{count}++;
1270 $self->{po}{$msgid}{'type'} = $type;
1271 $self->{po}{$msgid}{'plural'} = $entry{'plural'}
1272 if defined $entry{'plural'};
1274 if (defined($flags)) {
1275 $flags = " $flags ";
1276 $flags =~ s/,/ /g;
1277 foreach my $flag (@known_flags) {
1278 if ($flags =~ /\s$flag\s/) { # if flag to be set
1279 unless ( defined($self->{po}{$msgid}{'flags'})
1280 && $self->{po}{$msgid}{'flags'} =~ /\b$flag\b/) {
1281 # flag not already set
1282 if (defined $self->{po}{$msgid}{'flags'}) {
1283 $self->{po}{$msgid}{'flags'} .= " ".$flag;
1284 } else {
1285 $self->{po}{$msgid}{'flags'} = $flag;
1291 # print STDERR "stored ((($msgid)))=>(((".$self->{po}{$msgid}{'msgstr'}.")))\n\n";
1295 =back
1297 =head1 Miscellaneous functions
1299 =over 4
1301 =item count_entries()
1303 Returns the number of entries in the catalog (without the header).
1305 =cut
1307 sub count_entries($) {
1308 my $self=shift;
1309 return $self->{count};
1312 =item count_entries_doc()
1314 Returns the number of entries in document. If a string appears multiple times
1315 in the document, it will be counted multiple times
1317 =cut
1319 sub count_entries_doc($) {
1320 my $self=shift;
1321 return $self->{count_doc};
1324 =item msgid($)
1326 Returns the msgid of the given number.
1328 =cut
1330 sub msgid($$) {
1331 my $self=shift;
1332 my $num=shift;
1334 foreach my $msgid ( keys %{$self->{po}} ) {
1335 return $msgid if ($self->{po}{$msgid}{'pos'} eq $num);
1337 return undef;
1340 =item msgid_doc($)
1342 Returns the msgid with the given position in the document.
1344 =cut
1346 sub msgid_doc($$) {
1347 my $self=shift;
1348 my $num=shift;
1350 foreach my $msgid ( keys %{$self->{po}} ) {
1351 foreach my $pos (split / /, $self->{po}{$msgid}{'pos_doc'}) {
1352 return $msgid if ($pos eq $num);
1355 return undef;
1358 =item get_charset()
1360 Returns the character set specified in the po header. If it hasn't been
1361 set, it will return "CHARSET".
1363 =cut
1365 sub get_charset() {
1366 my $self=shift;
1368 $self->{header} =~ /charset=(.*?)[\s\\]/;
1370 if (defined $1) {
1371 return $1;
1372 } else {
1373 return "CHARSET";
1377 =item set_charset($)
1379 This sets the character set of the po header to the value specified in its
1380 first argument. If you never call this function (and no file with a specified
1381 character set is read), the default value is left to "CHARSET". This value
1382 doesn't change the behavior of this module, it's just used to fill that field
1383 in the header, and to return it in get_charset().
1385 =cut
1387 sub set_charset() {
1388 my $self=shift;
1390 my ($newchar,$oldchar);
1391 $newchar = shift;
1392 $oldchar = $self->get_charset();
1394 $self->{header} =~ s/$oldchar/$newchar/;
1395 $self->{encoder}=find_encoding($newchar);
1398 #----[ helper functions ]---------------------------------------------------
1400 # transforme the string from its po file representation to the form which
1401 # should be used to print it
1402 sub unescape_text {
1403 my $text = shift;
1405 print STDERR "\nunescape [$text]====" if $debug{'escape'};
1406 $text = join("",split(/\n/,$text));
1407 $text =~ s/\\"/"/g;
1408 # unescape newlines
1409 # NOTE on \G:
1410 # The following regular expression introduce newlines.
1411 # Thus, ^ doesn't match all beginnings of lines.
1412 # \G is a zero-width assertion that matches the position
1413 # of the previous substitution with s///g. As every
1414 # substitution ends by a newline, it always matches a
1415 # position just after a newline.
1416 $text =~ s/( # $1:
1417 (\G|[^\\]) # beginning of the line or any char
1418 # different from '\'
1419 (\\\\)* # followed by any even number of '\'
1420 )\\n # and followed by an escaped newline
1421 /$1\n/sgx; # single string, match globally, allow comments
1422 # unescape tabulations
1423 $text =~ s/( # $1:
1424 (\G|[^\\])# beginning of the line or any char
1425 # different from '\'
1426 (\\\\)* # followed by any even number of '\'
1427 )\\t # and followed by an escaped tabulation
1428 /$1\t/mgx; # multilines string, match globally, allow comments
1429 # and unescape the escape character
1430 $text =~ s/\\\\/\\/g;
1431 print STDERR ">$text<\n" if $debug{'escape'};
1433 return $text;
1436 # transform the string to its representation as it should be written in po
1437 # files
1438 sub escape_text {
1439 my $text = shift;
1441 print STDERR "\nescape [$text]====" if $debug{'escape'};
1442 $text =~ s/\\/\\\\/g;
1443 $text =~ s/"/\\"/g;
1444 $text =~ s/\n/\\n/g;
1445 $text =~ s/\t/\\t/g;
1446 print STDERR ">$text<\n" if $debug{'escape'};
1448 return $text;
1451 # put quotes around the string on each lines (without escaping it)
1452 # It does also normalize the text (ie, make sure its representation is wraped
1453 # on the 80th char, but without changing the meaning of the string)
1454 sub quote_text {
1455 my $string = shift;
1457 return '""' unless defined($string) && length($string);
1459 print STDERR "\nquote [$string]====" if $debug{'quote'};
1460 # break lines on newlines, if any
1461 # see unescape_text for an explanation on \G
1462 $string =~ s/( # $1:
1463 (\G|[^\\]) # beginning of the line or any char
1464 # different from '\'
1465 (\\\\)* # followed by any even number of '\'
1466 \\n) # and followed by an escaped newline
1467 /$1\n/sgx; # single string, match globally, allow comments
1468 $string = wrap($string);
1469 my @string = split(/\n/,$string);
1470 $string = join ("\"\n\"",@string);
1471 $string = "\"$string\"";
1472 if (scalar @string > 1 && $string[0] ne '') {
1473 $string = "\"\"\n".$string;
1476 print STDERR ">$string<\n" if $debug{'quote'};
1477 return $string;
1480 # undo the work of the quote_text function
1481 sub unquote_text {
1482 my $string = shift;
1483 print STDERR "\nunquote [$string]====" if $debug{'quote'};
1484 $string =~ s/^""\\n//s;
1485 $string =~ s/^"(.*)"$/$1/s;
1486 $string =~ s/"\n"//gm;
1487 # Note: an even number of '\' could precede \\n, but I could not build a
1488 # document to test this
1489 $string =~ s/([^\\])\\n\n/$1!!DUMMYPOPM!!/gm;
1490 $string =~ s|!!DUMMYPOPM!!|\\n|gm;
1491 print STDERR ">$string<\n" if $debug{'quote'};
1492 return $string;
1495 # canonize the string: write it on only one line, changing consecutive
1496 # whitespace to only one space.
1497 # Warning, it changes the string and should only be called if the string is
1498 # plain text
1499 sub canonize {
1500 my $text=shift;
1501 print STDERR "\ncanonize [$text]====" if $debug{'canonize'};
1502 $text =~ s/^ *//s;
1503 $text =~ s/^[ \t]+/ /gm;
1504 # if ($text eq "\n"), it messed up the first string (header)
1505 $text =~ s/\n/ /gm if ($text ne "\n");
1506 $text =~ s/([.)]) +/$1 /gm;
1507 $text =~ s/([^.)]) */$1 /gm;
1508 $text =~ s/ *$//s;
1509 print STDERR ">$text<\n" if $debug{'canonize'};
1510 return $text;
1513 # wraps the string. We don't use Text::Wrap since it mangles whitespace at
1514 # the end of splited line
1515 sub wrap {
1516 my $text=shift;
1517 return "0" if ($text eq '0');
1518 my $col=shift || 76;
1519 my @lines=split(/\n/,"$text");
1520 my $res="";
1521 my $first=1;
1522 while (defined(my $line=shift @lines)) {
1523 if ($first && length($line) > $col - 10) {
1524 unshift @lines,$line;
1525 $first=0;
1526 next;
1528 if (length($line) > $col) {
1529 my $pos=rindex($line," ",$col);
1530 while (substr($line,$pos-1,1) eq '.' && $pos != -1) {
1531 $pos=rindex($line," ",$pos-1);
1533 if ($pos == -1) {
1534 # There are no spaces in the first $col chars, pick-up the
1535 # first space
1536 $pos = index($line," ");
1538 if ($pos != -1) {
1539 my $end=substr($line,$pos+1);
1540 $line=substr($line,0,$pos+1);
1541 if ($end =~ s/^( +)//) {
1542 $line .= $1;
1544 unshift @lines,$end;
1547 $first=0;
1548 $res.="$line\n";
1550 # Restore the original trailing spaces
1551 $res =~ s/\s+$//s;
1552 if ($text =~ m/(\s+)$/s) {
1553 $res .= $1;
1555 return $res;
1558 # outputs properly a '# ... ' line to be put in the po file
1559 sub format_comment {
1560 my $comment=shift;
1561 my $char=shift;
1562 my $result = "#". $char . $comment;
1563 $result =~ s/\n/\n#$char/gs;
1564 $result =~ s/^#$char$/#/gm;
1565 $result .= "\n";
1566 return $result;
1570 1;
1571 __END__
1573 =back
1575 =head1 AUTHORS
1577 Denis Barbier <barbier@linuxfr.org>
1578 Martin Quinson (mquinson#debian.org)
1580 =cut