hgbook: 8271c8891b0e tools/po4a/lib/Locale/Po4a/Po.pm

hgbook

view tools/po4a/lib/Locale/Po4a/Po.pm @ 627:8271c8891b0e

Add Chinese translation

author	Dongsheng Song <dongsheng.song@gmail.com>
date	Thu Mar 12 15:53:01 2009 +0800 (2009-03-12)
parents
children

line source

1 # Locale::Po4a::Po -- manipulation of po files

2 # $Id: Po.pm,v 1.95 2009-02-28 22:18:39 nekral-guest Exp $

3 #

4 # This program is free software; you may redistribute it and/or modify it

5 # under the terms of GPL (see COPYING).

7 ############################################################################

8 # Modules and declarations

9 ############################################################################

11 =head1 NAME

13 Locale::Po4a::Po - po file manipulation module

15 =head1 SYNOPSIS

17 use Locale::Po4a::Po;

18 my $pofile=Locale::Po4a::Po->new();

20 # Read po file

21 $pofile->read('file.po');

23 # Add an entry

24 $pofile->push('msgid' => 'Hello', 'msgstr' => 'bonjour',

25 'flags' => "wrap", 'reference'=>'file.c:46');

27 # Extract a translation

28 $pofile->gettext("Hello"); # returns 'bonjour'

30 # Write back to a file

31 $pofile->write('otherfile.po');

33 =head1 DESCRIPTION

35 Locale::Po4a::Po is a module that allows you to manipulate message

36 catalogs. You can load and write from/to a file (which extension is often

37 I<po>), you can build new entries on the fly or request for the translation

38 of a string.

40 For a more complete description of message catalogs in the po format and

41 their use, please refer to the documentation of the gettext program.

43 This module is part of the PO4A project, which objective is to use po files

44 (designed at origin to ease the translation of program messages) to

45 translate everything, including documentation (man page, info manual),

46 package description, debconf templates, and everything which may benefit

47 from this.

49 =head1 OPTIONS ACCEPTED BY THIS MODULE

51 =over 4

53 =item porefs

55 This specifies the reference format. It can be one of 'none' to not produce

56 any reference, 'noline' to not specify the line number, and 'full' to

57 include complete references.

59 =back

61 =cut

63 use IO::File;

66 require Exporter;

68 package Locale::Po4a::Po;

69 use DynaLoader;

71 use Locale::Po4a::Common qw(wrap_msg wrap_mod wrap_ref_mod dgettext);

73 use subs qw(makespace);

74 use vars qw(@ISA @EXPORT_OK);

75 @ISA = qw(Exporter DynaLoader);

76 @EXPORT = qw(%debug);

77 @EXPORT_OK = qw(&move_po_if_needed);

79 use Locale::Po4a::TransTractor;

80 # Try to use a C extension if present.

81 eval("bootstrap Locale::Po4a::Po $Locale::Po4a::TransTractor::VERSION");

83 use 5.006;

84 use strict;

85 use warnings;

87 use Carp qw(croak);

88 use File::Path; # mkdir before write

89 use File::Copy; # move

90 use POSIX qw(strftime floor);

91 use Time::Local;

93 use Encode;

95 my @known_flags=qw(wrap no-wrap c-format fuzzy);

97 our %debug=('canonize' => 0,

98 'quote' => 0,

99 'escape' => 0,

100 'encoding' => 0,

101 'filter' => 0);

102

103 =head1 Functions about whole message catalogs

104

105 =over 4

106

107 =item new()

108

109 Creates a new message catalog. If an argument is provided, it's the name of

110 a po file we should load.

111

112 =cut

113

114 sub new {

115 my ($this, $options) = (shift, shift);

116 my $class = ref($this) || $this;

117 my $self = {};

118 bless $self, $class;

119 $self->initialize($options);

120

121 my $filename = shift;

122 $self->read($filename) if defined($filename) && length($filename);

123 return $self;

124 }

125

126 # Return the numerical timezone (e.g. +0200)

127 # Neither the %z nor the %s formats of strftime are portable:

128 # '%s' is not supported on Solaris and '%z' indicates

129 # "2006-10-25 19:36E. Europe Standard Time" on MS Windows.

130 sub timezone {

131 my @g = gmtime();

132 my @l = localtime();

133

134 my $diff;

135 $diff = floor(timelocal(@l)/60 +0.5);

136 $diff -= floor(timelocal(@g)/60 +0.5);

137

138 my $h = floor($diff / 60) + $l[8]; # $l[8] indicates if we are currently

139 # in a daylight saving time zone

140 my $m = $diff%60;

141

142 return sprintf "%+03d%02d\n", $h, $m;

143 }

144

145 sub initialize {

146 my ($self, $options) = (shift, shift);

147 my $date = strftime("%Y-%m-%d %H:%M", localtime).timezone();

148 chomp $date;

149 # $options = ref($options) || $options;

150

151 $self->{options}{'porefs'}= 'full';

152 $self->{options}{'msgid-bugs-address'}= undef;

153 $self->{options}{'copyright-holder'}= "Free Software Foundation, Inc.";

154 $self->{options}{'package-name'}= "PACKAGE";

155 $self->{options}{'package-version'}= "VERSION";

156 foreach my $opt (keys %$options) {

157 if ($options->{$opt}) {

158 die wrap_mod("po4a::po",

159 dgettext ("po4a", "Unknown option: %s"), $opt)

160 unless exists $self->{options}{$opt};

161 $self->{options}{$opt} = $options->{$opt};

162 }

163 }

164 $self->{options}{'porefs'} =~ /^(full|noline|none)$/ ||

165 die wrap_mod("po4a::po",

166 dgettext ("po4a",

167 "Invalid value for option 'porefs' ('%s' is ".

168 "not one of 'full', 'noline' or 'none')"),

169 $self->{options}{'porefs'});

170

171 $self->{po}=();

172 $self->{count}=0; # number of msgids in the PO

173 # count_doc: number of strings in the document

174 # (duplicate strings counted multiple times)

175 $self->{count_doc}=0;

176 $self->{header_comment}=

177 " SOME DESCRIPTIVE TITLE\n"

178 ." Copyright (C) YEAR ".

179 $self->{options}{'copyright-holder'}."\n"

180 ." This file is distributed under the same license ".

181 "as the ".$self->{options}{'package-name'}." package.\n"

182 ." FIRST AUTHOR <EMAIL\@ADDRESS>, YEAR.\n"

183 ."\n"

184 .", fuzzy";

185 # $self->header_tag="fuzzy";

186 $self->{header}=escape_text("Project-Id-Version: ".

187 $self->{options}{'package-name'}." ".

188 $self->{options}{'package-version'}."\n".

189 ((defined $self->{options}{'msgid-bugs-address'})?

190 "Report-Msgid-Bugs-To: ".$self->{options}{'msgid-bugs-address'}."\n":

191 "").

192 "POT-Creation-Date: $date\n".

193 "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n".

194 "Last-Translator: FULL NAME <EMAIL\@ADDRESS>\n".

195 "Language-Team: LANGUAGE <LL\@li.org>\n".

196 "MIME-Version: 1.0\n".

197 "Content-Type: text/plain; charset=CHARSET\n".

198 "Content-Transfer-Encoding: ENCODING");

199

200 $self->{encoder}=find_encoding("ascii");

201

202 # To make stats about gettext hits

203 $self->stats_clear();

204 }

205

206 =item read($)

207

208 Reads a po file (which name is given as argument). Previously existing

209 entries in self are not removed, the new ones are added to the end of the

210 catalog.

211

212 =cut

213

214 sub read {

215 my $self=shift;

216 my $filename=shift

217 or croak wrap_mod("po4a::po",

218 dgettext("po4a",

219 "Please provide a non-null filename"));

220

221 my $fh;

222 if ($filename eq '-') {

223 $fh=*STDIN;

224 } else {

225 open $fh,"<$filename"

226 or croak wrap_mod("po4a::po",

227 dgettext("po4a", "Can't read from %s: %s"),

228 $filename, $!);

229 }

230

231 ## Read paragraphs line-by-line

232 my $pofile="";

233 my $textline;

234 while (defined ($textline = <$fh>)) {

235 $pofile .= $textline;

236 }

237 # close INPUT

238 # or croak (sprintf(dgettext("po4a",

239 # "Can't close %s after reading: %s"),

240 # $filename,$!)."\n");

241

242 my $linenum=0;

243

244 foreach my $msg (split (/\n\n/,$pofile)) {

245 my ($msgid,$msgstr,$comment,$automatic,$reference,$flags,$buffer);

246 my ($msgid_plural, $msgstr_plural);

247 foreach my $line (split (/\n/,$msg)) {

248 $linenum++;

249 if ($line =~ /^#\. ?(.*)$/) { # Automatic comment

250 $automatic .= (defined($automatic) ? "\n" : "").$1;

251

252 } elsif ($line =~ /^#: ?(.*)$/) { # reference

253 $reference .= (defined($reference) ? "\n" : "").$1;

254

255 } elsif ($line =~ /^#, ?(.*)$/) { # flags

256 $flags .= (defined($flags) ? "\n" : "").$1;

257

258 } elsif ($line =~ /^#(.*)$/) { # Translator comments

259 $comment .= (defined($comment) ? "\n" : "").($1||"");

260

261 } elsif ($line =~ /^msgid (".*")$/) { # begin of msgid

262 $buffer = $1;

263

264 } elsif ($line =~ /^msgid_plural (".*")$/) {

265 # begin of msgid_plural, end of msgid

266

267 $msgid = $buffer;

268 $buffer = $1;

269

270 } elsif ($line =~ /^msgstr (".*")$/) {

271 # begin of msgstr, end of msgid

272

273 $msgid = $buffer;

274 $buffer = "$1";

275

276 } elsif ($line =~ /^msgstr\[([0-9]+)\] (".*")$/) {

277 # begin of msgstr[x], end of msgid_plural or msgstr[x-1]

278

279 # Note: po4a cannot uses plural forms

280 # (no integer to use the plural form)

281 # * drop the msgstr[x] where x >= 2

282 # * use msgstr[0] as the translation of msgid

283 # * use msgstr[1] as the translation of msgid_plural

284

285 if ($1 eq "0") {

286 $msgid_plural = $buffer;

287 $buffer = "$2";

288 } elsif ($1 eq "1") {

289 $msgstr = $buffer;

290 $buffer = "$2";

291 } elsif ($1 eq "2") {

292 $msgstr_plural = $buffer;

293 warn wrap_ref_mod("$filename:$linenum",

294 "po4a::po",

295 dgettext("po4a", "Messages with more than 2 plural forms are not supported."));

296 }

297 } elsif ($line =~ /^(".*")$/) {

298 # continuation of a line

299 $buffer .= "\n$1";

300

301 } else {

302 warn wrap_ref_mod("$filename:$linenum",

303 "po4a::po",

304 dgettext("po4a", "Strange line: -->%s<--"),

305 $line);

306 }

307 }

308 $linenum++;

309 if (defined $msgid_plural) {

310 $msgstr_plural=$buffer;

311

312 $msgid = unquote_text($msgid) if (defined($msgid));

313 $msgstr = unquote_text($msgstr) if (defined($msgstr));

314

315 $self->push_raw ('msgid' => $msgid,

316 'msgstr' => $msgstr,

317 'reference' => $reference,

318 'flags' => $flags,

319 'comment' => $comment,

320 'automatic' => $automatic,

321 'plural' => 0);

322

323 $msgid_plural = unquote_text($msgid_plural)

324 if (defined($msgid_plural));

325 $msgstr_plural = unquote_text($msgstr_plural)

326 if (defined($msgstr_plural));

327

328 $self->push_raw ('msgid' => $msgid_plural,

329 'msgstr' => $msgstr_plural,

330 'reference' => $reference,

331 'flags' => $flags,

332 'comment' => $comment,

333 'automatic' => $automatic,

334 'plural' => 1);

335 } else {

336 $msgstr=$buffer;

337

338 $msgid = unquote_text($msgid) if (defined($msgid));

339 $msgstr = unquote_text($msgstr) if (defined($msgstr));

340

341 $self->push_raw ('msgid' => $msgid,

342 'msgstr' => $msgstr,

343 'reference' => $reference,

344 'flags' => $flags,

345 'comment' => $comment,

346 'automatic' => $automatic);

347 }

348 }

349 }

350

351 =item write($)

352

353 Writes the current catalog to the given file.

354

355 =cut

356

357 sub write{

358 my $self=shift;

359 my $filename=shift

360 or croak dgettext("po4a","Can't write to a file without filename")."\n";

361

362 my $fh;

363 if ($filename eq '-') {

364 $fh=\*STDOUT;

365 } else {

366 # make sure the directory in which we should write the localized

367 # file exists

368 my $dir = $filename;

369 if ($dir =~ m|/|) {

370 $dir =~ s|/[^/]*$||;

371

372 File::Path::mkpath($dir, 0, 0755) # Croaks on error

373 if (length ($dir) && ! -e $dir);

374 }

375 open $fh,">$filename"

376 or croak wrap_mod("po4a::po",

377 dgettext("po4a", "Can't write to %s: %s"),

378 $filename, $!);

379 }

380

381 print $fh "".format_comment($self->{header_comment},"")

382 if defined($self->{header_comment}) && length($self->{header_comment});

383

384 print $fh "msgid \"\"\n";

385 print $fh "msgstr ".quote_text($self->{header})."\n\n";

386

387

388 my $buf_msgstr_plural; # USed to keep the first msgstr of plural forms

389 my $first=1;

390 foreach my $msgid ( sort { ($self->{po}{"$a"}{'pos'}) <=>

391 ($self->{po}{"$b"}{'pos'})

392 } keys %{$self->{po}}) {

393 my $output="";

394

395 if ($first) {

396 $first=0;

397 } else {

398 $output .= "\n";

399 }

400

401 $output .= format_comment($self->{po}{$msgid}{'comment'},"")

402 if defined($self->{po}{$msgid}{'comment'})

403 && length ($self->{po}{$msgid}{'comment'});

404 if ( defined($self->{po}{$msgid}{'automatic'})

405 && length ($self->{po}{$msgid}{'automatic'})) {

406 foreach my $comment (split(/\\n/,$self->{po}{$msgid}{'automatic'}))

407 {

408 $output .= format_comment($comment, ". ")

409 }

410 }

411 $output .= format_comment($self->{po}{$msgid}{'type'},". type: ")

412 if defined($self->{po}{$msgid}{'type'})

413 && length ($self->{po}{$msgid}{'type'});

414 $output .= format_comment($self->{po}{$msgid}{'reference'},": ")

415 if defined($self->{po}{$msgid}{'reference'})

416 && length ($self->{po}{$msgid}{'reference'});

417 $output .= "#, ". join(", ", sort split(/\s+/,$self->{po}{$msgid}{'flags'}))."\n"

418 if defined($self->{po}{$msgid}{'flags'})

419 && length ($self->{po}{$msgid}{'flags'});

420

421 if (exists $self->{po}{$msgid}{'plural'}) {

422 if ($self->{po}{$msgid}{'plural'} == 0) {

423 if ($self->get_charset =~ /^utf-8$/i) {

424 my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'});

425 $msgid = Encode::decode_utf8($msgid);

426 $output .= Encode::encode_utf8("msgid ".quote_text($msgid)."\n");

427 $buf_msgstr_plural = Encode::encode_utf8("msgstr[0] ".quote_text($msgstr)."\n");

428 } else {

429 $output = "msgid ".quote_text($msgid)."\n";

430 $buf_msgstr_plural = "msgstr[0] ".quote_text($self->{po}{$msgid}{'msgstr'})."\n";

431 }

432 } elsif ($self->{po}{$msgid}{'plural'} == 1) {

433 # TODO: there may be only one plural form

434 if ($self->get_charset =~ /^utf-8$/i) {

435 my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'});

436 $msgid = Encode::decode_utf8($msgid);

437 $output = Encode::encode_utf8("msgid_plural ".quote_text($msgid)."\n");

438 $output .= $buf_msgstr_plural;

439 $output .= Encode::encode_utf8("msgstr[1] ".quote_text($msgstr)."\n");

440 $buf_msgstr_plural = "";

441 } else {

442 $output = "msgid_plural ".quote_text($msgid)."\n";

443 $output .= $buf_msgstr_plural;

444 $output .= "msgstr[1] ".quote_text($self->{po}{$msgid}{'msgstr'})."\n";

445 }

446 } else {

447 die wrap_msg(dgettext("po4a","Can't write PO files with more than two plural forms."));

448 }

449 } else {

450 if ($self->get_charset =~ /^utf-8$/i) {

451 my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'});

452 $msgid = Encode::decode_utf8($msgid);

453 $output .= Encode::encode_utf8("msgid ".quote_text($msgid)."\n");

454 $output .= Encode::encode_utf8("msgstr ".quote_text($msgstr)."\n");

455 } else {

456 $output .= "msgid ".quote_text($msgid)."\n";

457 $output .= "msgstr ".quote_text($self->{po}{$msgid}{'msgstr'})."\n";

458 }

459 }

460

461 print $fh $output;

462 }

463 # print STDERR "$fh";

464 # if ($filename ne '-') {

465 # close $fh

466 # or croak (sprintf(dgettext("po4a",

467 # "Can't close %s after writing: %s\n"),

468 # $filename,$!));

469 # }

470 }

471

472 =item write_if_needed($$)

473

474 Like write, but if the PO or POT file already exists, the object will be

475 written in a temporary file which will be compared with the existing file

476 to check that the update is needed (this avoids to change a POT just to

477 update a line reference or the POT-Creation-Date field).

478

479 =cut

480

481 sub move_po_if_needed {

482 my ($new_po, $old_po, $backup) = (shift, shift, shift);

483 my $diff;

484

485 if (-e $old_po) {

486 my $diff_ignore = "-I'^#:' "

487 ."-I'^\"POT-Creation-Date:' "

488 ."-I'^\"PO-Revision-Date:'";

489 $diff = qx(diff -q $diff_ignore $old_po $new_po);

490 if ( $diff eq "" ) {

491 unlink $new_po

492 or die wrap_msg(dgettext("po4a","Can't unlink %s: %s."),

493 $new_po, $!);

494 # touch the old PO

495 my ($atime, $mtime) = (time,time);

496 utime $atime, $mtime, $old_po;

497 } else {

498 if ($backup) {

499 copy $old_po, $old_po."~"

500 or die wrap_msg(dgettext("po4a","Can't copy %s to %s: %s."),

501 $old_po, $old_po."~", $!);

502 } else {

503 }

504 move $new_po, $old_po

505 or die wrap_msg(dgettext("po4a","Can't move %s to %s: %s."),

506 $new_po, $old_po, $!);

507 }

508 } else {

509 move $new_po, $old_po

510 or die wrap_msg(dgettext("po4a","Can't move %s to %s: %s."),

511 $new_po, $old_po, $!);

512 }

513 }

514

515 sub write_if_needed {

516 my $self=shift;

517 my $filename=shift

518 or croak dgettext("po4a","Can't write to a file without filename")."\n";

519

520 if (-e $filename) {

521 my ($tmp_filename);

522 (undef,$tmp_filename)=File::Temp->tempfile($filename."XXXX",

523 DIR => "/tmp",

524 OPEN => 0,

525 UNLINK => 0);

526 $self->write($tmp_filename);

527 move_po_if_needed($tmp_filename, $filename);

528 } else {

529 $self->write($filename);

530 }

531 }

532

533 =item gettextize($$)

534

535 This function produces one translated message catalog from two catalogs, an

536 original and a translation. This process is described in L<po4a(7)|po4a.7>,

537 section I<Gettextization: how does it work?>.

538

539 =cut

540

541 sub gettextize {

542 my $this = shift;

543 my $class = ref($this) || $this;

544 my ($poorig,$potrans)=(shift,shift);

545

546 my $pores=Locale::Po4a::Po->new();

547

548 my $please_fail = 0;

549 my $toobad = dgettext("po4a",

550 "\nThe gettextization failed (once again). Don't give up, ".

551 "gettextizing is a subtle art, but this is only needed once ".

552 "to convert a project to the gorgeous luxus offered by po4a ".

553 "to translators.".

554 "\nPlease refer to the po4a(7) documentation, the section ".

555 "\"HOWTO convert a pre-existing translation to po4a?\" ".

556 "contains several hints to help you in your task");

557

558 # Don't fail right now when the entry count does not match. Instead, give

559 # it a try so that the user can see where we fail (which is probably where

560 # the problem is).

561 if ($poorig->count_entries_doc() > $potrans->count_entries_doc()) {

562 warn wrap_mod("po4a gettextize", dgettext("po4a",

563 "Original has more strings than the translation (%d>%d). ".

564 "Please fix it by editing the translated version to add ".

565 "some dummy entry."),

566 $poorig->count_entries_doc(),

567 $potrans->count_entries_doc());

568 $please_fail = 1;

569 } elsif ($poorig->count_entries_doc() < $potrans->count_entries_doc()) {

570 warn wrap_mod("po4a gettextize", dgettext("po4a",

571 "Original has less strings than the translation (%d<%d). ".

572 "Please fix it by removing the extra entry from the ".

573 "translated file. You may need an addendum (cf po4a(7)) ".

574 "to reput the chunk in place after gettextization. A ".

575 "possible cause is that a text duplicated in the original ".

576 "is not translated the same way each time. Remove one of ".

577 "the translations, and you're fine."),

578 $poorig->count_entries_doc(),

579 $potrans->count_entries_doc());

580 $please_fail = 1;

581 }

582

583 if ( $poorig->get_charset =~ /^utf-8$/i ) {

584 $potrans->to_utf8;

585 $pores->set_charset("utf-8");

586 } else {

587 if ($potrans->get_charset eq "CHARSET") {

588 $pores->set_charset("ascii");

589 } else {

590 $pores->set_charset($potrans->get_charset);

591 }

592 }

593 print "Po character sets:\n".

594 " original=".$poorig->get_charset."\n".

595 " translated=".$potrans->get_charset."\n".

596 " result=".$pores->get_charset."\n"

597 if $debug{'encoding'};

598

599 for (my ($o,$t)=(0,0) ;

600 $o<$poorig->count_entries_doc() && $t<$potrans->count_entries_doc();

601 $o++,$t++) {

602 #

603 # Extract some informations

604

605 my ($orig,$trans)=($poorig->msgid_doc($o),$potrans->msgid_doc($t));

606 # print STDERR "Matches [[$orig]]<<$trans>>\n";

607

608 my ($reforig,$reftrans)=($poorig->{po}{$orig}{'reference'},

609 $potrans->{po}{$trans}{'reference'});

610 my ($typeorig,$typetrans)=($poorig->{po}{$orig}{'type'},

611 $potrans->{po}{$trans}{'type'});

612

613 #

614 # Make sure the type of both string exist

615 #

616 die wrap_mod("po4a gettextize",

617 "Internal error: type of original string number %s ".

618 "isn't provided", $o)

619 if ($typeorig eq '');

620

621 die wrap_mod("po4a gettextize",

622 "Internal error: type of translated string number %s ".

623 "isn't provided", $o)

624 if ($typetrans eq '');

625

626 #

627 # Make sure both type are the same

628 #

629 if ($typeorig ne $typetrans){

630 $pores->write("gettextization.failed.po");

631 die wrap_msg(dgettext("po4a",

632 "po4a gettextization: Structure disparity between ".

633 "original and translated files:\n".

634 "msgid (at %s) is of type '%s' while\n".

635 "msgstr (at %s) is of type '%s'.\n".

636 "Original text: %s\n".

637 "Translated text: %s\n".

638 "(result so far dumped to gettextization.failed.po)").

639 "%s",

640 $reforig, $typeorig,

641 $reftrans, $typetrans,

642 $orig,

643 $trans,

644 $toobad);

645 }

646

647 #

648 # Push the entry

649 #

650 my $flags;

651 if (defined $poorig->{po}{$orig}{'flags'}) {

652 $flags = $poorig->{po}{$orig}{'flags'}." fuzzy";

653 } else {

654 $flags = "fuzzy";

655 }

656 $pores->push_raw('msgid' => $orig,

657 'msgstr' => $trans,

658 'flags' => $flags,

659 'type' => $typeorig,

660 'reference' => $reforig,

661 'conflict' => 1,

662 'transref' => $potrans->{po}{$trans}{'reference'})

663 unless (defined($pores->{po}{$orig})

664 and ($pores->{po}{$orig}{'msgstr'} eq $trans))

665 # FIXME: maybe we should be smarter about what reference should be

666 # sent to push_raw.

667 }

668

669 # make sure we return a useful error message when entry count differ

670 die "$toobad\n" if $please_fail;

671

672 return $pores;

673 }

674

675 =item filter($)

676

677 This function extracts a catalog from an existing one. Only the entries having

678 a reference in the given file will be placed in the resulting catalog.

679

680 This function parses its argument, converts it to a perl function definition,

681 eval this definition and filter the fields for which this function returns

682 true.

683

684 I love perl sometimes ;)

685

686 =cut

687

688 sub filter {

689 my $self=shift;

690 our $filter=shift;

691

692 my $res;

693 $res = Locale::Po4a::Po->new();

694

695 # Parse the filter

696 our $code="sub apply { return ";

697 our $pos=0;

698 our $length = length $filter;

699

700 # explode chars to parts. How to subscript a string in Perl?

701 our @filter = split(//,$filter);

702

703 sub gloups {

704 my $fmt=shift;

705 my $space = "";

706 for (1..$pos){

707 $space .= ' ';

708 }

709 die wrap_msg("$fmt\n$filter\n$space^ HERE");

710 }

711 sub showmethecode {

712 return unless $debug{'filter'};

713 my $fmt=shift;

714 my $space="";

715 for (1..$pos){

716 $space .= ' ';

717 }

718 print STDERR "$filter\n$space^ $fmt\n";#"$code\n";

719 }

720

721 # I dream of a lex in perl :-/

722 sub parse_expression {

723 showmethecode("Begin expression")

724 if $debug{'filter'};

725

726 gloups("Begin of expression expected, got '%s'",$filter[$pos])

727 unless ($filter[$pos] eq '(');

728 $pos ++; # pass the '('

729 if ($filter[$pos] eq '&') {

730 # AND

731 $pos++;

732 showmethecode("Begin of AND")

733 if $debug{'filter'};

734 $code .= "(";

735 while (1) {

736 gloups ("Unfinished AND statement.")

737 if ($pos == $length);

738 parse_expression();

739 if ($filter[$pos] eq '(') {

740 $code .= " && ";

741 } elsif ($filter[$pos] eq ')') {

742 last; # do not eat that char

743 } else {

744 gloups("End of AND or begin of sub-expression expected, got '%s'", $filter[$pos]);

745 }

746 }

747 $code .= ")";

748 } elsif ($filter[$pos] eq '|') {

749 # OR

750 $pos++;

751 $code .= "(";

752 while (1) {

753 gloups("Unfinished OR statement.")

754 if ($pos == $length);

755 parse_expression();

756 if ($filter[$pos] eq '(') {

757 $code .= " || ";

758 } elsif ($filter[$pos] eq ')') {

759 last; # do not eat that char

760 } else {

761 gloups("End of OR or begin of sub-expression expected, got '%s'",$filter[$pos]);

762 }

763 }

764 $code .= ")";

765 } elsif ($filter[$pos] eq '!') {

766 # NOT

767 $pos++;

768 $code .= "(!";

769 gloups("Missing sub-expression in NOT statement.")

770 if ($pos == $length);

771 parse_expression();

772 $code .= ")";

773 } else {

774 # must be an equal. Let's get field and argument

775 my ($field,$arg,$done);

776 $field = substr($filter,$pos);

777 gloups("EQ statement contains no '=' or invalid field name")

778 unless ($field =~ /([a-z]*)=/i);

779 $field = lc($1);

780 $pos += (length $field) + 1;

781

782 # check that we've got a valid field name,

783 # and the number it referes to

784 # DO NOT CHANGE THE ORDER

785 my @names=qw(msgid msgstr reference flags comment automatic);

786 my $fieldpos;

787 for ($fieldpos = 0;

788 $fieldpos < scalar @names && $field ne $names[$fieldpos];

789 $fieldpos++) {}

790 gloups("Invalid field name: %s",$field)

791 if $fieldpos == scalar @names; # not found

792

793 # Now, get the argument value. It has to be between quotes,

794 # which can be escaped

795 # We point right on the first char of the argument

796 # (first quote already eaten)

797 my $escaped = 0;

798 my $quoted = 0;

799 if ($filter[$pos] eq '"') {

800 $pos++;

801 $quoted = 1;

802 }

803 showmethecode(($quoted?"Quoted":"Unquoted")." argument of field '$field'")

804 if $debug{'filter'};

805

806 while (!$done) {

807 gloups("Unfinished EQ argument.")

808 if ($pos == $length);

809

810 if ($quoted) {

811 if ($filter[$pos] eq '\\') {

812 if ($escaped) {

813 $arg .= '\\';

814 $escaped = 0;

815 } else {

816 $escaped = 1;

817 }

818 } elsif ($escaped) {

819 if ($filter[$pos] eq '"') {

820 $arg .= '"';

821 $escaped = 0;

822 } else {

823 gloups("Invalid escape sequence in argument: '\\%s'",$filter[$pos]);

824 }

825 } else {

826 if ($filter[$pos] eq '"') {

827 $done = 1;

828 } else {

829 $arg .= $filter[$pos];

830 }

831 }

832 } else {

833 if ($filter[$pos] eq ')') {

834 # counter the next ++ since we don't want to eat

835 # this char

836 $pos--;

837 $done = 1;

838 } else {

839 $arg .= $filter[$pos];

840 }

841 }

842 $pos++;

843 }

844 # and now, add the code to check this equality

845 $code .= "(\$_[$fieldpos] =~ m/$arg/)";

846

847 }

848 showmethecode("End of expression")

849 if $debug{'filter'};

850 gloups("Unfinished statement.")

851 if ($pos == $length);

852 gloups("End of expression expected, got '%s'",$filter[$pos])

853 unless ($filter[$pos] eq ')');

854 $pos++;

855 }

856 # And now, launch the beast, finish the function and use eval

857 # to construct this function.

858 # Ok, the lack of lexer is a fair price for the eval ;)

859 parse_expression();

860 gloups("Garbage at the end of the expression")

861 if ($pos != $length);

862 $code .= "; }";

863 print STDERR "CODE = $code\n"

864 if $debug{'filter'};

865 eval $code;

866 die wrap_mod("po4a::po", dgettext("po4a", "Eval failure: %s"), $@)

867 if $@;

868

869 for (my $cpt=(0) ;

870 $cpt<$self->count_entries();

871 $cpt++) {

872

873 my ($msgid,$ref,$msgstr,$flags,$type,$comment,$automatic);

874

875 $msgid = $self->msgid($cpt);

876 $ref=$self->{po}{$msgid}{'reference'};

877

878 $msgstr= $self->{po}{$msgid}{'msgstr'};

879 $flags = $self->{po}{$msgid}{'flags'};

880 $type = $self->{po}{$msgid}{'type'};

881 $comment = $self->{po}{$msgid}{'comment'};

882 $automatic = $self->{po}{$msgid}{'automatic'};

883

884 # DO NOT CHANGE THE ORDER

885 $res->push_raw('msgid' => $msgid,

886 'msgstr' => $msgstr,

887 'flags' => $flags,

888 'type' => $type,

889 'reference' => $ref,

890 'comment' => $comment,

891 'automatic' => $automatic)

892 if (apply($msgid,$msgstr,$ref,$flags,$comment,$automatic));

893 }

894 # delete the apply subroutine

895 # otherwise it will be redefined.

896 undef &apply;

897 return $res;

898 }

899

900 =item to_utf8()

901

902 Recodes to utf-8 the po's msgstrs. Does nothing if the charset is not

903 specified in the po file ("CHARSET" value), or if it's already utf-8 or

904 ascii.

905

906 =cut

907

908 sub to_utf8 {

909 my $this = shift;

910 my $charset = $this->get_charset();

911

912 unless ($charset eq "CHARSET" or

913 $charset =~ /^ascii$/i or

914 $charset =~ /^utf-8$/i) {

915 foreach my $msgid ( keys %{$this->{po}} ) {

916 Encode::from_to($this->{po}{$msgid}{'msgstr'}, $charset, "utf-8");

917 }

918 $this->set_charset("utf-8");

919 }

920 }

921

922 =back

923

924 =head1 Functions to use a message catalog for translations

925

926 =over 4

927

928 =item gettext($%)

929

930 Request the translation of the string given as argument in the current catalog.

931 The function returns the original (untranslated) string if the string was not

932 found.

933

934 After the string to translate, you can pass a hash of extra

935 arguments. Here are the valid entries:

936

937 =over

938

939 =item wrap

940

941 boolean indicating whether we can consider that whitespaces in string are

942 not important. If yes, the function canonizes the string before looking for

943 a translation, and wraps the result.

944

945 =item wrapcol

946

947 The column at which we should wrap (default: 76).

948

949 =back

950

951 =cut

952

953 sub gettext {

954 my $self=shift;

955 my $text=shift;

956 my (%opt)=@_;

957 my $res;

958

959 return "" unless defined($text) && length($text); # Avoid returning the header.

960 my $validoption="reference wrap wrapcol";

961 my %validoption;

962

963 map { $validoption{$_}=1 } (split(/ /,$validoption));

964 foreach (keys %opt) {

965 Carp::confess "internal error: unknown arg $_.\n".

966 "Here are the valid options: $validoption.\n"

967 unless $validoption{$_};

968 }

969

970 $text=canonize($text)

971 if ($opt{'wrap'});

972

973 my $esc_text=escape_text($text);

974

975 $self->{gettextqueries}++;

976

977 if ( defined $self->{po}{$esc_text}

978 and defined $self->{po}{$esc_text}{'msgstr'}

979 and length $self->{po}{$esc_text}{'msgstr'}

980 and ( not defined $self->{po}{$esc_text}{'flags'}

981 or $self->{po}{$esc_text}{'flags'} !~ /fuzzy/)) {

982

983 $self->{gettexthits}++;

984 $res = unescape_text($self->{po}{$esc_text}{'msgstr'});

985 if (defined $self->{po}{$esc_text}{'plural'}) {

986 if ($self->{po}{$esc_text}{'plural'} eq "0") {

987 warn wrap_mod("po4a gettextize", dgettext("po4a",

988 "'%s' is the singular form of a message, ".

989 "po4a will use the msgstr[0] translation (%s)."),

990 $esc_text, $res);

991 } else {

992 warn wrap_mod("po4a gettextize", dgettext("po4a",

993 "'%s' is the plural form of a message, ".

994 "po4a will use the msgstr[1] translation (%s)."),

995 $esc_text, $res);

996 }

997 }

998 } else {

999 $res = $text;

1000 }

1001

1002 if ($opt{'wrap'}) {

1003 if ($self->get_charset =~ /^utf-8$/i) {

1004 $res=Encode::decode_utf8($res);

1005 $res=wrap ($res, $opt{'wrapcol'} || 76);

1006 $res=Encode::encode_utf8($res);

1007 } else {

1008 $res=wrap ($res, $opt{'wrapcol'} || 76);

1009 }

1010 }

1011 # print STDERR "Gettext >>>$text<<<(escaped=$esc_text)=[[[$res]]]\n\n";

1012 return $res;

1013 }

1014

1015 =item stats_get()

1016

1017 Returns statistics about the hit ratio of gettext since the last time that

1018 stats_clear() was called. Please note that it's not the same

1019 statistics than the one printed by msgfmt --statistic. Here, it's statistics

1020 about recent usage of the po file, while msgfmt reports the status of the

1021 file. Example of use:

1022

1023 [some use of the po file to translate stuff]

1024

1025 ($percent,$hit,$queries) = $pofile->stats_get();

1026 print "So far, we found translations for $percent\% ($hit of $queries) of strings.\n";

1027

1028 =cut

1029

1030 sub stats_get() {

1031 my $self=shift;

1032 my ($h,$q)=($self->{gettexthits},$self->{gettextqueries});

1033 my $p = ($q == 0 ? 100 : int($h/$q*10000)/100);

1034

1035 # $p =~ s/\.00//;

1036 # $p =~ s/(\..)0/$1/;

1037

1038 return ( $p,$h,$q );

1039 }

1040

1041 =item stats_clear()

1042

1043 Clears the statistics about gettext hits.

1044

1045 =cut

1046

1047 sub stats_clear {

1048 my $self = shift;

1049 $self->{gettextqueries} = 0;

1050 $self->{gettexthits} = 0;

1051 }

1052

1053 =back

1054

1055 =head1 Functions to build a message catalog

1056

1057 =over 4

1058

1059 =item push(%)

1060

1061 Push a new entry at the end of the current catalog. The arguments should

1062 form a hash table. The valid keys are:

1063

1064 =over 4

1065

1066 =item msgid

1067

1068 the string in original language.

1069

1070 =item msgstr

1071

1072 the translation.

1073

1074 =item reference

1075

1076 an indication of where this string was found. Example: file.c:46 (meaning

1077 in 'file.c' at line 46). It can be a space-separated list in case of

1078 multiple occurrences.

1079

1080 =item comment

1081

1082 a comment added here manually (by the translators). The format here is free.

1083

1084 =item automatic

1085

1086 a comment which was automatically added by the string extraction

1087 program. See the I<--add-comments> option of the B<xgettext> program for

1088 more information.

1089

1090 =item flags

1091

1092 space-separated list of all defined flags for this entry.

1093

1094 Valid flags are: c-text, python-text, lisp-text, elisp-text, librep-text,

1095 smalltalk-text, java-text, awk-text, object-pascal-text, ycp-text,

1096 tcl-text, wrap, no-wrap and fuzzy.

1097

1098 See the gettext documentation for their meaning.

1099

1100 =item type

1101

1102 This is mostly an internal argument: it is used while gettextizing

1103 documents. The idea here is to parse both the original and the translation

1104 into a po object, and merge them, using one's msgid as msgid and the

1105 other's msgid as msgstr. To make sure that things get ok, each msgid in po

1106 objects are given a type, based on their structure (like "chapt", "sect1",

1107 "p" and so on in docbook). If the types of strings are not the same, that

1108 means that both files do not share the same structure, and the process

1109 reports an error.

1110

1111 This information is written as automatic comment in the po file since this

1112 gives to translators some context about the strings to translate.

1113

1114 =item wrap

1115

1116 boolean indicating whether whitespaces can be mangled in cosmetic

1117 reformattings. If true, the string is canonized before use.

1118

1119 This information is written to the po file using the 'wrap' or 'no-wrap' flag.

1120

1121 =item wrapcol

1122

1123 The column at which we should wrap (default: 76).

1124

1125 This information is not written to the po file.

1126

1127 =back

1128

1129 =cut

1130

1131 sub push {

1132 my $self=shift;

1133 my %entry=@_;

1134

1135 my $validoption="wrap wrapcol type msgid msgstr automatic flags reference";

1136 my %validoption;

1137

1138 map { $validoption{$_}=1 } (split(/ /,$validoption));

1139 foreach (keys %entry) {

1140 Carp::confess "internal error: unknown arg $_.\n".

1141 "Here are the valid options: $validoption.\n"

1142 unless $validoption{$_};

1143 }

1144

1145 unless ($entry{'wrap'}) {

1146 $entry{'flags'} .= " no-wrap";

1147 }

1148 if (defined ($entry{'msgid'})) {

1149 $entry{'msgid'} = canonize($entry{'msgid'})

1150 if ($entry{'wrap'});

1151

1152 $entry{'msgid'} = escape_text($entry{'msgid'});

1153 }

1154 if (defined ($entry{'msgstr'})) {

1155 $entry{'msgstr'} = canonize($entry{'msgstr'})

1156 if ($entry{'wrap'});

1157

1158 $entry{'msgstr'} = escape_text($entry{'msgstr'});

1159 }

1160

1161 $self->push_raw(%entry);

1162 }

1163

1164 # The same as push(), but assuming that msgid and msgstr are already escaped

1165 sub push_raw {

1166 my $self=shift;

1167 my %entry=@_;

1168 my ($msgid,$msgstr,$reference,$comment,$automatic,$flags,$type,$transref)=

1169 ($entry{'msgid'},$entry{'msgstr'},

1170 $entry{'reference'},$entry{'comment'},$entry{'automatic'},

1171 $entry{'flags'},$entry{'type'},$entry{'transref'});

1172 my $keep_conflict = $entry{'conflict'};

1173

1174 # print STDERR "Push_raw\n";

1175 # print STDERR " msgid=>>>$msgid<<<\n" if $msgid;

1176 # print STDERR " msgstr=[[[$msgstr]]]\n" if $msgstr;

1177 # Carp::cluck " flags=$flags\n" if $flags;

1178

1179 return unless defined($entry{'msgid'});

1180

1181 #no msgid => header definition

1182 unless (length($entry{'msgid'})) {

1183 # if (defined($self->{header}) && $self->{header} =~ /\S/) {

1184 # warn dgettext("po4a","Redefinition of the header. ".

1185 # "The old one will be discarded\n");

1186 # } FIXME: do that iff the header isn't the default one.

1187 $self->{header}=$msgstr;

1188 $self->{header_comment}=$comment;

1189 my $charset = $self->get_charset;

1190 if ($charset ne "CHARSET") {

1191 $self->{encoder}=find_encoding($charset);

1192 } else {

1193 $self->{encoder}=find_encoding("ascii");

1194 }

1195 return;

1196 }

1197

1198 if ($self->{options}{'porefs'} eq "none") {

1199 $reference = "";

1200 } elsif ($self->{options}{'porefs'} eq "noline") {

1201 $reference =~ s/:[0-9]*/:1/g;

1202 }

1203

1204 if (defined($self->{po}{$msgid})) {

1205 warn wrap_mod("po4a::po",

1206 dgettext("po4a","msgid defined twice: %s"),

1207 $msgid)

1208 if (0); # FIXME: put a verbose stuff

1209 if ( defined $msgstr

1210 and defined $self->{po}{$msgid}{'msgstr'}

1211 and $self->{po}{$msgid}{'msgstr'} ne $msgstr) {

1212 my $txt=quote_text($msgid);

1213 my ($first,$second)=

1214 (format_comment(". ",$self->{po}{$msgid}{'reference'}).

1215 quote_text($self->{po}{$msgid}{'msgstr'}),

1216

1217 format_comment(". ",$reference).

1218 quote_text($msgstr));

1219

1220 if ($keep_conflict) {

1221 if ($self->{po}{$msgid}{'msgstr'} =~ m/^#-#-#-#-# .* #-#-#-#-#\\n/s) {

1222 $msgstr = $self->{po}{$msgid}{'msgstr'}.

1223 "\\n#-#-#-#-# $transref #-#-#-#-#\\n".

1224 $msgstr;

1225 } else {

1226 $msgstr = "#-#-#-#-# ".

1227 $self->{po}{$msgid}{'transref'}.

1228 " #-#-#-#-#\\n".

1229 $self->{po}{$msgid}{'msgstr'}."\\n".

1230 "#-#-#-#-# $transref #-#-#-#-#\\n".

1231 $msgstr;

1232 }

1233 # Every msgid will have the same list of references.

1234 # Only keep the last list.

1235 $self->{po}{$msgid}{'reference'} = "";

1236 } else {

1237 warn wrap_msg(dgettext("po4a",

1238 "Translations don't match for:\n".

1239 "%s\n".

1240 "-->First translation:\n".

1241 "%s\n".

1242 " Second translation:\n".

1243 "%s\n".

1244 " Old translation discarded."),

1245 $txt,$first,$second);

1246 }

1247 }

1248 }

1249 if (defined $transref) {

1250 $self->{po}{$msgid}{'transref'} = $transref;

1251 }

1252 if (defined $reference) {

1253 if (defined $self->{po}{$msgid}{'reference'}) {

1254 $self->{po}{$msgid}{'reference'} .= " ".$reference;

1255 } else {

1256 $self->{po}{$msgid}{'reference'} = $reference;

1257 }

1258 }

1259 $self->{po}{$msgid}{'msgstr'} = $msgstr;

1260 $self->{po}{$msgid}{'comment'} = $comment;

1261 $self->{po}{$msgid}{'automatic'} = $automatic;

1262 if (defined($self->{po}{$msgid}{'pos_doc'})) {

1263 $self->{po}{$msgid}{'pos_doc'} .= " ".$self->{count_doc}++;

1264 } else {

1265 $self->{po}{$msgid}{'pos_doc'} = $self->{count_doc}++;

1266 }

1267 unless (defined($self->{po}{$msgid}{'pos'})) {

1268 $self->{po}{$msgid}{'pos'} = $self->{count}++;

1269 }

1270 $self->{po}{$msgid}{'type'} = $type;

1271 $self->{po}{$msgid}{'plural'} = $entry{'plural'}

1272 if defined $entry{'plural'};

1273

1274 if (defined($flags)) {

1275 $flags = " $flags ";

1276 $flags =~ s/,/ /g;

1277 foreach my $flag (@known_flags) {

1278 if ($flags =~ /\s$flag\s/) { # if flag to be set

1279 unless ( defined($self->{po}{$msgid}{'flags'})

1280 && $self->{po}{$msgid}{'flags'} =~ /\b$flag\b/) {

1281 # flag not already set

1282 if (defined $self->{po}{$msgid}{'flags'}) {

1283 $self->{po}{$msgid}{'flags'} .= " ".$flag;

1284 } else {

1285 $self->{po}{$msgid}{'flags'} = $flag;

1286 }

1287 }

1288 }

1289 }

1290 }

1291 # print STDERR "stored ((($msgid)))=>(((".$self->{po}{$msgid}{'msgstr'}.")))\n\n";

1292

1293 }

1294

1295 =back

1296

1297 =head1 Miscellaneous functions

1298

1299 =over 4

1300

1301 =item count_entries()

1302

1303 Returns the number of entries in the catalog (without the header).

1304

1305 =cut

1306

1307 sub count_entries($) {

1308 my $self=shift;

1309 return $self->{count};

1310 }

1311

1312 =item count_entries_doc()

1313

1314 Returns the number of entries in document. If a string appears multiple times

1315 in the document, it will be counted multiple times

1316

1317 =cut

1318

1319 sub count_entries_doc($) {

1320 my $self=shift;

1321 return $self->{count_doc};

1322 }

1323

1324 =item msgid($)

1325

1326 Returns the msgid of the given number.

1327

1328 =cut

1329

1330 sub msgid($$) {

1331 my $self=shift;

1332 my $num=shift;

1333

1334 foreach my $msgid ( keys %{$self->{po}} ) {

1335 return $msgid if ($self->{po}{$msgid}{'pos'} eq $num);

1336 }

1337 return undef;

1338 }

1339

1340 =item msgid_doc($)

1341

1342 Returns the msgid with the given position in the document.

1343

1344 =cut

1345

1346 sub msgid_doc($$) {

1347 my $self=shift;

1348 my $num=shift;

1349

1350 foreach my $msgid ( keys %{$self->{po}} ) {

1351 foreach my $pos (split / /, $self->{po}{$msgid}{'pos_doc'}) {

1352 return $msgid if ($pos eq $num);

1353 }

1354 }

1355 return undef;

1356 }

1357

1358 =item get_charset()

1359

1360 Returns the character set specified in the po header. If it hasn't been

1361 set, it will return "CHARSET".

1362

1363 =cut

1364

1365 sub get_charset() {

1366 my $self=shift;

1367

1368 $self->{header} =~ /charset=(.*?)[\s\\]/;

1369

1370 if (defined $1) {

1371 return $1;

1372 } else {

1373 return "CHARSET";

1374 }

1375 }

1376

1377 =item set_charset($)

1378

1379 This sets the character set of the po header to the value specified in its

1380 first argument. If you never call this function (and no file with a specified

1381 character set is read), the default value is left to "CHARSET". This value

1382 doesn't change the behavior of this module, it's just used to fill that field

1383 in the header, and to return it in get_charset().

1384

1385 =cut

1386

1387 sub set_charset() {

1388 my $self=shift;

1389

1390 my ($newchar,$oldchar);

1391 $newchar = shift;

1392 $oldchar = $self->get_charset();

1393

1394 $self->{header} =~ s/$oldchar/$newchar/;

1395 $self->{encoder}=find_encoding($newchar);

1396 }

1397

1398 #----[ helper functions ]---------------------------------------------------

1399

1400 # transforme the string from its po file representation to the form which

1401 # should be used to print it

1402 sub unescape_text {

1403 my $text = shift;

1404

1405 print STDERR "\nunescape [$text]====" if $debug{'escape'};

1406 $text = join("",split(/\n/,$text));

1407 $text =~ s/\\"/"/g;

1408 # unescape newlines

1409 # NOTE on \G:

1410 # The following regular expression introduce newlines.

1411 # Thus, ^ doesn't match all beginnings of lines.

1412 # \G is a zero-width assertion that matches the position

1413 # of the previous substitution with s///g. As every

1414 # substitution ends by a newline, it always matches a

1415 # position just after a newline.

1416 $text =~ s/( # $1:

1417 (\G|[^\\]) # beginning of the line or any char

1418 # different from '\'

1419 (\\\\)* # followed by any even number of '\'

1420 )\\n # and followed by an escaped newline

1421 /$1\n/sgx; # single string, match globally, allow comments

1422 # unescape tabulations

1423 $text =~ s/( # $1:

1424 (\G|[^\\])# beginning of the line or any char

1425 # different from '\'

1426 (\\\\)* # followed by any even number of '\'

1427 )\\t # and followed by an escaped tabulation

1428 /$1\t/mgx; # multilines string, match globally, allow comments

1429 # and unescape the escape character

1430 $text =~ s/\\\\/\\/g;

1431 print STDERR ">$text<\n" if $debug{'escape'};

1432

1433 return $text;

1434 }

1435

1436 # transform the string to its representation as it should be written in po

1437 # files

1438 sub escape_text {

1439 my $text = shift;

1440

1441 print STDERR "\nescape [$text]====" if $debug{'escape'};

1442 $text =~ s/\\/\\\\/g;

1443 $text =~ s/"/\\"/g;

1444 $text =~ s/\n/\\n/g;

1445 $text =~ s/\t/\\t/g;

1446 print STDERR ">$text<\n" if $debug{'escape'};

1447

1448 return $text;

1449 }

1450

1451 # put quotes around the string on each lines (without escaping it)

1452 # It does also normalize the text (ie, make sure its representation is wraped

1453 # on the 80th char, but without changing the meaning of the string)

1454 sub quote_text {

1455 my $string = shift;

1456

1457 return '""' unless defined($string) && length($string);

1458

1459 print STDERR "\nquote [$string]====" if $debug{'quote'};

1460 # break lines on newlines, if any

1461 # see unescape_text for an explanation on \G

1462 $string =~ s/( # $1:

1463 (\G|[^\\]) # beginning of the line or any char

1464 # different from '\'

1465 (\\\\)* # followed by any even number of '\'

1466 \\n) # and followed by an escaped newline

1467 /$1\n/sgx; # single string, match globally, allow comments

1468 $string = wrap($string);

1469 my @string = split(/\n/,$string);

1470 $string = join ("\"\n\"",@string);

1471 $string = "\"$string\"";

1472 if (scalar @string > 1 && $string[0] ne '') {

1473 $string = "\"\"\n".$string;

1474 }

1475

1476 print STDERR ">$string<\n" if $debug{'quote'};

1477 return $string;

1478 }

1479

1480 # undo the work of the quote_text function

1481 sub unquote_text {

1482 my $string = shift;

1483 print STDERR "\nunquote [$string]====" if $debug{'quote'};

1484 $string =~ s/^""\\n//s;

1485 $string =~ s/^"(.*)"$/$1/s;

1486 $string =~ s/"\n"//gm;

1487 # Note: an even number of '\' could precede \\n, but I could not build a

1488 # document to test this

1489 $string =~ s/([^\\])\\n\n/$1!!DUMMYPOPM!!/gm;

1490 $string =~ s|!!DUMMYPOPM!!|\\n|gm;

1491 print STDERR ">$string<\n" if $debug{'quote'};

1492 return $string;

1493 }

1494

1495 # canonize the string: write it on only one line, changing consecutive

1496 # whitespace to only one space.

1497 # Warning, it changes the string and should only be called if the string is

1498 # plain text

1499 sub canonize {

1500 my $text=shift;

1501 print STDERR "\ncanonize [$text]====" if $debug{'canonize'};

1502 $text =~ s/^ *//s;

1503 $text =~ s/^[ \t]+/ /gm;

1504 # if ($text eq "\n"), it messed up the first string (header)

1505 $text =~ s/\n/ /gm if ($text ne "\n");

1506 $text =~ s/([.)]) +/$1 /gm;

1507 $text =~ s/([^.)]) */$1 /gm;

1508 $text =~ s/ *$//s;

1509 print STDERR ">$text<\n" if $debug{'canonize'};

1510 return $text;

1511 }

1512

1513 # wraps the string. We don't use Text::Wrap since it mangles whitespace at

1514 # the end of splited line

1515 sub wrap {

1516 my $text=shift;

1517 return "0" if ($text eq '0');

1518 my $col=shift || 76;

1519 my @lines=split(/\n/,"$text");

1520 my $res="";

1521 my $first=1;

1522 while (defined(my $line=shift @lines)) {

1523 if ($first && length($line) > $col - 10) {

1524 unshift @lines,$line;

1525 $first=0;

1526 next;

1527 }

1528 if (length($line) > $col) {

1529 my $pos=rindex($line," ",$col);

1530 while (substr($line,$pos-1,1) eq '.' && $pos != -1) {

1531 $pos=rindex($line," ",$pos-1);

1532 }

1533 if ($pos == -1) {

1534 # There are no spaces in the first $col chars, pick-up the

1535 # first space

1536 $pos = index($line," ");

1537 }

1538 if ($pos != -1) {

1539 my $end=substr($line,$pos+1);

1540 $line=substr($line,0,$pos+1);

1541 if ($end =~ s/^( +)//) {

1542 $line .= $1;

1543 }

1544 unshift @lines,$end;

1545 }

1546 }

1547 $first=0;

1548 $res.="$line\n";

1549 }

1550 # Restore the original trailing spaces

1551 $res =~ s/\s+$//s;

1552 if ($text =~ m/(\s+)$/s) {

1553 $res .= $1;

1554 }

1555 return $res;

1556 }

1557

1558 # outputs properly a '# ... ' line to be put in the po file

1559 sub format_comment {

1560 my $comment=shift;

1561 my $char=shift;

1562 my $result = "#". $char . $comment;

1563 $result =~ s/\n/\n#$char/gs;

1564 $result =~ s/^#$char$/#/gm;

1565 $result .= "\n";

1566 return $result;

1567 }

1568

1569

1570 1;

1571 __END__

1572

1573 =back

1574

1575 =head1 AUTHORS

1576

1577 Denis Barbier <barbier@linuxfr.org>

1578 Martin Quinson (mquinson#debian.org)

1579

1580 =cut