hgbook

annotate tools/po4a/lib/Locale/Po4a/Po.pm @ 653:6b1577ef5135

Update Chinese translation
author Dongsheng Song <dongsheng.song@gmail.com>
date Fri Mar 20 17:17:55 2009 +0800 (2009-03-20)
parents
children
rev   line source
dongsheng@623 1 # Locale::Po4a::Po -- manipulation of po files
dongsheng@623 2 # $Id: Po.pm,v 1.95 2009-02-28 22:18:39 nekral-guest Exp $
dongsheng@623 3 #
dongsheng@623 4 # This program is free software; you may redistribute it and/or modify it
dongsheng@623 5 # under the terms of GPL (see COPYING).
dongsheng@623 6
dongsheng@623 7 ############################################################################
dongsheng@623 8 # Modules and declarations
dongsheng@623 9 ############################################################################
dongsheng@623 10
dongsheng@623 11 =head1 NAME
dongsheng@623 12
dongsheng@623 13 Locale::Po4a::Po - po file manipulation module
dongsheng@623 14
dongsheng@623 15 =head1 SYNOPSIS
dongsheng@623 16
dongsheng@623 17 use Locale::Po4a::Po;
dongsheng@623 18 my $pofile=Locale::Po4a::Po->new();
dongsheng@623 19
dongsheng@623 20 # Read po file
dongsheng@623 21 $pofile->read('file.po');
dongsheng@623 22
dongsheng@623 23 # Add an entry
dongsheng@623 24 $pofile->push('msgid' => 'Hello', 'msgstr' => 'bonjour',
dongsheng@623 25 'flags' => "wrap", 'reference'=>'file.c:46');
dongsheng@623 26
dongsheng@623 27 # Extract a translation
dongsheng@623 28 $pofile->gettext("Hello"); # returns 'bonjour'
dongsheng@623 29
dongsheng@623 30 # Write back to a file
dongsheng@623 31 $pofile->write('otherfile.po');
dongsheng@623 32
dongsheng@623 33 =head1 DESCRIPTION
dongsheng@623 34
dongsheng@623 35 Locale::Po4a::Po is a module that allows you to manipulate message
dongsheng@623 36 catalogs. You can load and write from/to a file (which extension is often
dongsheng@623 37 I<po>), you can build new entries on the fly or request for the translation
dongsheng@623 38 of a string.
dongsheng@623 39
dongsheng@623 40 For a more complete description of message catalogs in the po format and
dongsheng@623 41 their use, please refer to the documentation of the gettext program.
dongsheng@623 42
dongsheng@623 43 This module is part of the PO4A project, which objective is to use po files
dongsheng@623 44 (designed at origin to ease the translation of program messages) to
dongsheng@623 45 translate everything, including documentation (man page, info manual),
dongsheng@623 46 package description, debconf templates, and everything which may benefit
dongsheng@623 47 from this.
dongsheng@623 48
dongsheng@623 49 =head1 OPTIONS ACCEPTED BY THIS MODULE
dongsheng@623 50
dongsheng@623 51 =over 4
dongsheng@623 52
dongsheng@623 53 =item porefs
dongsheng@623 54
dongsheng@623 55 This specifies the reference format. It can be one of 'none' to not produce
dongsheng@623 56 any reference, 'noline' to not specify the line number, and 'full' to
dongsheng@623 57 include complete references.
dongsheng@623 58
dongsheng@623 59 =back
dongsheng@623 60
dongsheng@623 61 =cut
dongsheng@623 62
dongsheng@623 63 use IO::File;
dongsheng@623 64
dongsheng@623 65
dongsheng@623 66 require Exporter;
dongsheng@623 67
dongsheng@623 68 package Locale::Po4a::Po;
dongsheng@623 69 use DynaLoader;
dongsheng@623 70
dongsheng@623 71 use Locale::Po4a::Common qw(wrap_msg wrap_mod wrap_ref_mod dgettext);
dongsheng@623 72
dongsheng@623 73 use subs qw(makespace);
dongsheng@623 74 use vars qw(@ISA @EXPORT_OK);
dongsheng@623 75 @ISA = qw(Exporter DynaLoader);
dongsheng@623 76 @EXPORT = qw(%debug);
dongsheng@623 77 @EXPORT_OK = qw(&move_po_if_needed);
dongsheng@623 78
dongsheng@623 79 use Locale::Po4a::TransTractor;
dongsheng@623 80 # Try to use a C extension if present.
dongsheng@623 81 eval("bootstrap Locale::Po4a::Po $Locale::Po4a::TransTractor::VERSION");
dongsheng@623 82
dongsheng@623 83 use 5.006;
dongsheng@623 84 use strict;
dongsheng@623 85 use warnings;
dongsheng@623 86
dongsheng@623 87 use Carp qw(croak);
dongsheng@623 88 use File::Path; # mkdir before write
dongsheng@623 89 use File::Copy; # move
dongsheng@623 90 use POSIX qw(strftime floor);
dongsheng@623 91 use Time::Local;
dongsheng@623 92
dongsheng@623 93 use Encode;
dongsheng@623 94
dongsheng@623 95 my @known_flags=qw(wrap no-wrap c-format fuzzy);
dongsheng@623 96
dongsheng@623 97 our %debug=('canonize' => 0,
dongsheng@623 98 'quote' => 0,
dongsheng@623 99 'escape' => 0,
dongsheng@623 100 'encoding' => 0,
dongsheng@623 101 'filter' => 0);
dongsheng@623 102
dongsheng@623 103 =head1 Functions about whole message catalogs
dongsheng@623 104
dongsheng@623 105 =over 4
dongsheng@623 106
dongsheng@623 107 =item new()
dongsheng@623 108
dongsheng@623 109 Creates a new message catalog. If an argument is provided, it's the name of
dongsheng@623 110 a po file we should load.
dongsheng@623 111
dongsheng@623 112 =cut
dongsheng@623 113
dongsheng@623 114 sub new {
dongsheng@623 115 my ($this, $options) = (shift, shift);
dongsheng@623 116 my $class = ref($this) || $this;
dongsheng@623 117 my $self = {};
dongsheng@623 118 bless $self, $class;
dongsheng@623 119 $self->initialize($options);
dongsheng@623 120
dongsheng@623 121 my $filename = shift;
dongsheng@623 122 $self->read($filename) if defined($filename) && length($filename);
dongsheng@623 123 return $self;
dongsheng@623 124 }
dongsheng@623 125
dongsheng@623 126 # Return the numerical timezone (e.g. +0200)
dongsheng@623 127 # Neither the %z nor the %s formats of strftime are portable:
dongsheng@623 128 # '%s' is not supported on Solaris and '%z' indicates
dongsheng@623 129 # "2006-10-25 19:36E. Europe Standard Time" on MS Windows.
dongsheng@623 130 sub timezone {
dongsheng@623 131 my @g = gmtime();
dongsheng@623 132 my @l = localtime();
dongsheng@623 133
dongsheng@623 134 my $diff;
dongsheng@623 135 $diff = floor(timelocal(@l)/60 +0.5);
dongsheng@623 136 $diff -= floor(timelocal(@g)/60 +0.5);
dongsheng@623 137
dongsheng@623 138 my $h = floor($diff / 60) + $l[8]; # $l[8] indicates if we are currently
dongsheng@623 139 # in a daylight saving time zone
dongsheng@623 140 my $m = $diff%60;
dongsheng@623 141
dongsheng@623 142 return sprintf "%+03d%02d\n", $h, $m;
dongsheng@623 143 }
dongsheng@623 144
dongsheng@623 145 sub initialize {
dongsheng@623 146 my ($self, $options) = (shift, shift);
dongsheng@623 147 my $date = strftime("%Y-%m-%d %H:%M", localtime).timezone();
dongsheng@623 148 chomp $date;
dongsheng@623 149 # $options = ref($options) || $options;
dongsheng@623 150
dongsheng@623 151 $self->{options}{'porefs'}= 'full';
dongsheng@623 152 $self->{options}{'msgid-bugs-address'}= undef;
dongsheng@623 153 $self->{options}{'copyright-holder'}= "Free Software Foundation, Inc.";
dongsheng@623 154 $self->{options}{'package-name'}= "PACKAGE";
dongsheng@623 155 $self->{options}{'package-version'}= "VERSION";
dongsheng@623 156 foreach my $opt (keys %$options) {
dongsheng@623 157 if ($options->{$opt}) {
dongsheng@623 158 die wrap_mod("po4a::po",
dongsheng@623 159 dgettext ("po4a", "Unknown option: %s"), $opt)
dongsheng@623 160 unless exists $self->{options}{$opt};
dongsheng@623 161 $self->{options}{$opt} = $options->{$opt};
dongsheng@623 162 }
dongsheng@623 163 }
dongsheng@623 164 $self->{options}{'porefs'} =~ /^(full|noline|none)$/ ||
dongsheng@623 165 die wrap_mod("po4a::po",
dongsheng@623 166 dgettext ("po4a",
dongsheng@623 167 "Invalid value for option 'porefs' ('%s' is ".
dongsheng@623 168 "not one of 'full', 'noline' or 'none')"),
dongsheng@623 169 $self->{options}{'porefs'});
dongsheng@623 170
dongsheng@623 171 $self->{po}=();
dongsheng@623 172 $self->{count}=0; # number of msgids in the PO
dongsheng@623 173 # count_doc: number of strings in the document
dongsheng@623 174 # (duplicate strings counted multiple times)
dongsheng@623 175 $self->{count_doc}=0;
dongsheng@623 176 $self->{header_comment}=
dongsheng@623 177 " SOME DESCRIPTIVE TITLE\n"
dongsheng@623 178 ." Copyright (C) YEAR ".
dongsheng@623 179 $self->{options}{'copyright-holder'}."\n"
dongsheng@623 180 ." This file is distributed under the same license ".
dongsheng@623 181 "as the ".$self->{options}{'package-name'}." package.\n"
dongsheng@623 182 ." FIRST AUTHOR <EMAIL\@ADDRESS>, YEAR.\n"
dongsheng@623 183 ."\n"
dongsheng@623 184 .", fuzzy";
dongsheng@623 185 # $self->header_tag="fuzzy";
dongsheng@623 186 $self->{header}=escape_text("Project-Id-Version: ".
dongsheng@623 187 $self->{options}{'package-name'}." ".
dongsheng@623 188 $self->{options}{'package-version'}."\n".
dongsheng@623 189 ((defined $self->{options}{'msgid-bugs-address'})?
dongsheng@623 190 "Report-Msgid-Bugs-To: ".$self->{options}{'msgid-bugs-address'}."\n":
dongsheng@623 191 "").
dongsheng@623 192 "POT-Creation-Date: $date\n".
dongsheng@623 193 "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n".
dongsheng@623 194 "Last-Translator: FULL NAME <EMAIL\@ADDRESS>\n".
dongsheng@623 195 "Language-Team: LANGUAGE <LL\@li.org>\n".
dongsheng@623 196 "MIME-Version: 1.0\n".
dongsheng@623 197 "Content-Type: text/plain; charset=CHARSET\n".
dongsheng@623 198 "Content-Transfer-Encoding: ENCODING");
dongsheng@623 199
dongsheng@623 200 $self->{encoder}=find_encoding("ascii");
dongsheng@623 201
dongsheng@623 202 # To make stats about gettext hits
dongsheng@623 203 $self->stats_clear();
dongsheng@623 204 }
dongsheng@623 205
dongsheng@623 206 =item read($)
dongsheng@623 207
dongsheng@623 208 Reads a po file (which name is given as argument). Previously existing
dongsheng@623 209 entries in self are not removed, the new ones are added to the end of the
dongsheng@623 210 catalog.
dongsheng@623 211
dongsheng@623 212 =cut
dongsheng@623 213
dongsheng@623 214 sub read {
dongsheng@623 215 my $self=shift;
dongsheng@623 216 my $filename=shift
dongsheng@623 217 or croak wrap_mod("po4a::po",
dongsheng@623 218 dgettext("po4a",
dongsheng@623 219 "Please provide a non-null filename"));
dongsheng@623 220
dongsheng@623 221 my $fh;
dongsheng@623 222 if ($filename eq '-') {
dongsheng@623 223 $fh=*STDIN;
dongsheng@623 224 } else {
dongsheng@623 225 open $fh,"<$filename"
dongsheng@623 226 or croak wrap_mod("po4a::po",
dongsheng@623 227 dgettext("po4a", "Can't read from %s: %s"),
dongsheng@623 228 $filename, $!);
dongsheng@623 229 }
dongsheng@623 230
dongsheng@623 231 ## Read paragraphs line-by-line
dongsheng@623 232 my $pofile="";
dongsheng@623 233 my $textline;
dongsheng@623 234 while (defined ($textline = <$fh>)) {
dongsheng@623 235 $pofile .= $textline;
dongsheng@623 236 }
dongsheng@623 237 # close INPUT
dongsheng@623 238 # or croak (sprintf(dgettext("po4a",
dongsheng@623 239 # "Can't close %s after reading: %s"),
dongsheng@623 240 # $filename,$!)."\n");
dongsheng@623 241
dongsheng@623 242 my $linenum=0;
dongsheng@623 243
dongsheng@623 244 foreach my $msg (split (/\n\n/,$pofile)) {
dongsheng@623 245 my ($msgid,$msgstr,$comment,$automatic,$reference,$flags,$buffer);
dongsheng@623 246 my ($msgid_plural, $msgstr_plural);
dongsheng@623 247 foreach my $line (split (/\n/,$msg)) {
dongsheng@623 248 $linenum++;
dongsheng@623 249 if ($line =~ /^#\. ?(.*)$/) { # Automatic comment
dongsheng@623 250 $automatic .= (defined($automatic) ? "\n" : "").$1;
dongsheng@623 251
dongsheng@623 252 } elsif ($line =~ /^#: ?(.*)$/) { # reference
dongsheng@623 253 $reference .= (defined($reference) ? "\n" : "").$1;
dongsheng@623 254
dongsheng@623 255 } elsif ($line =~ /^#, ?(.*)$/) { # flags
dongsheng@623 256 $flags .= (defined($flags) ? "\n" : "").$1;
dongsheng@623 257
dongsheng@623 258 } elsif ($line =~ /^#(.*)$/) { # Translator comments
dongsheng@623 259 $comment .= (defined($comment) ? "\n" : "").($1||"");
dongsheng@623 260
dongsheng@623 261 } elsif ($line =~ /^msgid (".*")$/) { # begin of msgid
dongsheng@623 262 $buffer = $1;
dongsheng@623 263
dongsheng@623 264 } elsif ($line =~ /^msgid_plural (".*")$/) {
dongsheng@623 265 # begin of msgid_plural, end of msgid
dongsheng@623 266
dongsheng@623 267 $msgid = $buffer;
dongsheng@623 268 $buffer = $1;
dongsheng@623 269
dongsheng@623 270 } elsif ($line =~ /^msgstr (".*")$/) {
dongsheng@623 271 # begin of msgstr, end of msgid
dongsheng@623 272
dongsheng@623 273 $msgid = $buffer;
dongsheng@623 274 $buffer = "$1";
dongsheng@623 275
dongsheng@623 276 } elsif ($line =~ /^msgstr\[([0-9]+)\] (".*")$/) {
dongsheng@623 277 # begin of msgstr[x], end of msgid_plural or msgstr[x-1]
dongsheng@623 278
dongsheng@623 279 # Note: po4a cannot uses plural forms
dongsheng@623 280 # (no integer to use the plural form)
dongsheng@623 281 # * drop the msgstr[x] where x >= 2
dongsheng@623 282 # * use msgstr[0] as the translation of msgid
dongsheng@623 283 # * use msgstr[1] as the translation of msgid_plural
dongsheng@623 284
dongsheng@623 285 if ($1 eq "0") {
dongsheng@623 286 $msgid_plural = $buffer;
dongsheng@623 287 $buffer = "$2";
dongsheng@623 288 } elsif ($1 eq "1") {
dongsheng@623 289 $msgstr = $buffer;
dongsheng@623 290 $buffer = "$2";
dongsheng@623 291 } elsif ($1 eq "2") {
dongsheng@623 292 $msgstr_plural = $buffer;
dongsheng@623 293 warn wrap_ref_mod("$filename:$linenum",
dongsheng@623 294 "po4a::po",
dongsheng@623 295 dgettext("po4a", "Messages with more than 2 plural forms are not supported."));
dongsheng@623 296 }
dongsheng@623 297 } elsif ($line =~ /^(".*")$/) {
dongsheng@623 298 # continuation of a line
dongsheng@623 299 $buffer .= "\n$1";
dongsheng@623 300
dongsheng@623 301 } else {
dongsheng@623 302 warn wrap_ref_mod("$filename:$linenum",
dongsheng@623 303 "po4a::po",
dongsheng@623 304 dgettext("po4a", "Strange line: -->%s<--"),
dongsheng@623 305 $line);
dongsheng@623 306 }
dongsheng@623 307 }
dongsheng@623 308 $linenum++;
dongsheng@623 309 if (defined $msgid_plural) {
dongsheng@623 310 $msgstr_plural=$buffer;
dongsheng@623 311
dongsheng@623 312 $msgid = unquote_text($msgid) if (defined($msgid));
dongsheng@623 313 $msgstr = unquote_text($msgstr) if (defined($msgstr));
dongsheng@623 314
dongsheng@623 315 $self->push_raw ('msgid' => $msgid,
dongsheng@623 316 'msgstr' => $msgstr,
dongsheng@623 317 'reference' => $reference,
dongsheng@623 318 'flags' => $flags,
dongsheng@623 319 'comment' => $comment,
dongsheng@623 320 'automatic' => $automatic,
dongsheng@623 321 'plural' => 0);
dongsheng@623 322
dongsheng@623 323 $msgid_plural = unquote_text($msgid_plural)
dongsheng@623 324 if (defined($msgid_plural));
dongsheng@623 325 $msgstr_plural = unquote_text($msgstr_plural)
dongsheng@623 326 if (defined($msgstr_plural));
dongsheng@623 327
dongsheng@623 328 $self->push_raw ('msgid' => $msgid_plural,
dongsheng@623 329 'msgstr' => $msgstr_plural,
dongsheng@623 330 'reference' => $reference,
dongsheng@623 331 'flags' => $flags,
dongsheng@623 332 'comment' => $comment,
dongsheng@623 333 'automatic' => $automatic,
dongsheng@623 334 'plural' => 1);
dongsheng@623 335 } else {
dongsheng@623 336 $msgstr=$buffer;
dongsheng@623 337
dongsheng@623 338 $msgid = unquote_text($msgid) if (defined($msgid));
dongsheng@623 339 $msgstr = unquote_text($msgstr) if (defined($msgstr));
dongsheng@623 340
dongsheng@623 341 $self->push_raw ('msgid' => $msgid,
dongsheng@623 342 'msgstr' => $msgstr,
dongsheng@623 343 'reference' => $reference,
dongsheng@623 344 'flags' => $flags,
dongsheng@623 345 'comment' => $comment,
dongsheng@623 346 'automatic' => $automatic);
dongsheng@623 347 }
dongsheng@623 348 }
dongsheng@623 349 }
dongsheng@623 350
dongsheng@623 351 =item write($)
dongsheng@623 352
dongsheng@623 353 Writes the current catalog to the given file.
dongsheng@623 354
dongsheng@623 355 =cut
dongsheng@623 356
dongsheng@623 357 sub write{
dongsheng@623 358 my $self=shift;
dongsheng@623 359 my $filename=shift
dongsheng@623 360 or croak dgettext("po4a","Can't write to a file without filename")."\n";
dongsheng@623 361
dongsheng@623 362 my $fh;
dongsheng@623 363 if ($filename eq '-') {
dongsheng@623 364 $fh=\*STDOUT;
dongsheng@623 365 } else {
dongsheng@623 366 # make sure the directory in which we should write the localized
dongsheng@623 367 # file exists
dongsheng@623 368 my $dir = $filename;
dongsheng@623 369 if ($dir =~ m|/|) {
dongsheng@623 370 $dir =~ s|/[^/]*$||;
dongsheng@623 371
dongsheng@623 372 File::Path::mkpath($dir, 0, 0755) # Croaks on error
dongsheng@623 373 if (length ($dir) && ! -e $dir);
dongsheng@623 374 }
dongsheng@623 375 open $fh,">$filename"
dongsheng@623 376 or croak wrap_mod("po4a::po",
dongsheng@623 377 dgettext("po4a", "Can't write to %s: %s"),
dongsheng@623 378 $filename, $!);
dongsheng@623 379 }
dongsheng@623 380
dongsheng@623 381 print $fh "".format_comment($self->{header_comment},"")
dongsheng@623 382 if defined($self->{header_comment}) && length($self->{header_comment});
dongsheng@623 383
dongsheng@623 384 print $fh "msgid \"\"\n";
dongsheng@623 385 print $fh "msgstr ".quote_text($self->{header})."\n\n";
dongsheng@623 386
dongsheng@623 387
dongsheng@623 388 my $buf_msgstr_plural; # USed to keep the first msgstr of plural forms
dongsheng@623 389 my $first=1;
dongsheng@623 390 foreach my $msgid ( sort { ($self->{po}{"$a"}{'pos'}) <=>
dongsheng@623 391 ($self->{po}{"$b"}{'pos'})
dongsheng@623 392 } keys %{$self->{po}}) {
dongsheng@623 393 my $output="";
dongsheng@623 394
dongsheng@623 395 if ($first) {
dongsheng@623 396 $first=0;
dongsheng@623 397 } else {
dongsheng@623 398 $output .= "\n";
dongsheng@623 399 }
dongsheng@623 400
dongsheng@623 401 $output .= format_comment($self->{po}{$msgid}{'comment'},"")
dongsheng@623 402 if defined($self->{po}{$msgid}{'comment'})
dongsheng@623 403 && length ($self->{po}{$msgid}{'comment'});
dongsheng@623 404 if ( defined($self->{po}{$msgid}{'automatic'})
dongsheng@623 405 && length ($self->{po}{$msgid}{'automatic'})) {
dongsheng@623 406 foreach my $comment (split(/\\n/,$self->{po}{$msgid}{'automatic'}))
dongsheng@623 407 {
dongsheng@623 408 $output .= format_comment($comment, ". ")
dongsheng@623 409 }
dongsheng@623 410 }
dongsheng@623 411 $output .= format_comment($self->{po}{$msgid}{'type'},". type: ")
dongsheng@623 412 if defined($self->{po}{$msgid}{'type'})
dongsheng@623 413 && length ($self->{po}{$msgid}{'type'});
dongsheng@623 414 $output .= format_comment($self->{po}{$msgid}{'reference'},": ")
dongsheng@623 415 if defined($self->{po}{$msgid}{'reference'})
dongsheng@623 416 && length ($self->{po}{$msgid}{'reference'});
dongsheng@623 417 $output .= "#, ". join(", ", sort split(/\s+/,$self->{po}{$msgid}{'flags'}))."\n"
dongsheng@623 418 if defined($self->{po}{$msgid}{'flags'})
dongsheng@623 419 && length ($self->{po}{$msgid}{'flags'});
dongsheng@623 420
dongsheng@623 421 if (exists $self->{po}{$msgid}{'plural'}) {
dongsheng@623 422 if ($self->{po}{$msgid}{'plural'} == 0) {
dongsheng@623 423 if ($self->get_charset =~ /^utf-8$/i) {
dongsheng@623 424 my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'});
dongsheng@623 425 $msgid = Encode::decode_utf8($msgid);
dongsheng@623 426 $output .= Encode::encode_utf8("msgid ".quote_text($msgid)."\n");
dongsheng@623 427 $buf_msgstr_plural = Encode::encode_utf8("msgstr[0] ".quote_text($msgstr)."\n");
dongsheng@623 428 } else {
dongsheng@623 429 $output = "msgid ".quote_text($msgid)."\n";
dongsheng@623 430 $buf_msgstr_plural = "msgstr[0] ".quote_text($self->{po}{$msgid}{'msgstr'})."\n";
dongsheng@623 431 }
dongsheng@623 432 } elsif ($self->{po}{$msgid}{'plural'} == 1) {
dongsheng@623 433 # TODO: there may be only one plural form
dongsheng@623 434 if ($self->get_charset =~ /^utf-8$/i) {
dongsheng@623 435 my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'});
dongsheng@623 436 $msgid = Encode::decode_utf8($msgid);
dongsheng@623 437 $output = Encode::encode_utf8("msgid_plural ".quote_text($msgid)."\n");
dongsheng@623 438 $output .= $buf_msgstr_plural;
dongsheng@623 439 $output .= Encode::encode_utf8("msgstr[1] ".quote_text($msgstr)."\n");
dongsheng@623 440 $buf_msgstr_plural = "";
dongsheng@623 441 } else {
dongsheng@623 442 $output = "msgid_plural ".quote_text($msgid)."\n";
dongsheng@623 443 $output .= $buf_msgstr_plural;
dongsheng@623 444 $output .= "msgstr[1] ".quote_text($self->{po}{$msgid}{'msgstr'})."\n";
dongsheng@623 445 }
dongsheng@623 446 } else {
dongsheng@623 447 die wrap_msg(dgettext("po4a","Can't write PO files with more than two plural forms."));
dongsheng@623 448 }
dongsheng@623 449 } else {
dongsheng@623 450 if ($self->get_charset =~ /^utf-8$/i) {
dongsheng@623 451 my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'});
dongsheng@623 452 $msgid = Encode::decode_utf8($msgid);
dongsheng@623 453 $output .= Encode::encode_utf8("msgid ".quote_text($msgid)."\n");
dongsheng@623 454 $output .= Encode::encode_utf8("msgstr ".quote_text($msgstr)."\n");
dongsheng@623 455 } else {
dongsheng@623 456 $output .= "msgid ".quote_text($msgid)."\n";
dongsheng@623 457 $output .= "msgstr ".quote_text($self->{po}{$msgid}{'msgstr'})."\n";
dongsheng@623 458 }
dongsheng@623 459 }
dongsheng@623 460
dongsheng@623 461 print $fh $output;
dongsheng@623 462 }
dongsheng@623 463 # print STDERR "$fh";
dongsheng@623 464 # if ($filename ne '-') {
dongsheng@623 465 # close $fh
dongsheng@623 466 # or croak (sprintf(dgettext("po4a",
dongsheng@623 467 # "Can't close %s after writing: %s\n"),
dongsheng@623 468 # $filename,$!));
dongsheng@623 469 # }
dongsheng@623 470 }
dongsheng@623 471
dongsheng@623 472 =item write_if_needed($$)
dongsheng@623 473
dongsheng@623 474 Like write, but if the PO or POT file already exists, the object will be
dongsheng@623 475 written in a temporary file which will be compared with the existing file
dongsheng@623 476 to check that the update is needed (this avoids to change a POT just to
dongsheng@623 477 update a line reference or the POT-Creation-Date field).
dongsheng@623 478
dongsheng@623 479 =cut
dongsheng@623 480
dongsheng@623 481 sub move_po_if_needed {
dongsheng@623 482 my ($new_po, $old_po, $backup) = (shift, shift, shift);
dongsheng@623 483 my $diff;
dongsheng@623 484
dongsheng@623 485 if (-e $old_po) {
dongsheng@623 486 my $diff_ignore = "-I'^#:' "
dongsheng@623 487 ."-I'^\"POT-Creation-Date:' "
dongsheng@623 488 ."-I'^\"PO-Revision-Date:'";
dongsheng@623 489 $diff = qx(diff -q $diff_ignore $old_po $new_po);
dongsheng@623 490 if ( $diff eq "" ) {
dongsheng@623 491 unlink $new_po
dongsheng@623 492 or die wrap_msg(dgettext("po4a","Can't unlink %s: %s."),
dongsheng@623 493 $new_po, $!);
dongsheng@623 494 # touch the old PO
dongsheng@623 495 my ($atime, $mtime) = (time,time);
dongsheng@623 496 utime $atime, $mtime, $old_po;
dongsheng@623 497 } else {
dongsheng@623 498 if ($backup) {
dongsheng@623 499 copy $old_po, $old_po."~"
dongsheng@623 500 or die wrap_msg(dgettext("po4a","Can't copy %s to %s: %s."),
dongsheng@623 501 $old_po, $old_po."~", $!);
dongsheng@623 502 } else {
dongsheng@623 503 }
dongsheng@623 504 move $new_po, $old_po
dongsheng@623 505 or die wrap_msg(dgettext("po4a","Can't move %s to %s: %s."),
dongsheng@623 506 $new_po, $old_po, $!);
dongsheng@623 507 }
dongsheng@623 508 } else {
dongsheng@623 509 move $new_po, $old_po
dongsheng@623 510 or die wrap_msg(dgettext("po4a","Can't move %s to %s: %s."),
dongsheng@623 511 $new_po, $old_po, $!);
dongsheng@623 512 }
dongsheng@623 513 }
dongsheng@623 514
dongsheng@623 515 sub write_if_needed {
dongsheng@623 516 my $self=shift;
dongsheng@623 517 my $filename=shift
dongsheng@623 518 or croak dgettext("po4a","Can't write to a file without filename")."\n";
dongsheng@623 519
dongsheng@623 520 if (-e $filename) {
dongsheng@623 521 my ($tmp_filename);
dongsheng@623 522 (undef,$tmp_filename)=File::Temp->tempfile($filename."XXXX",
dongsheng@623 523 DIR => "/tmp",
dongsheng@623 524 OPEN => 0,
dongsheng@623 525 UNLINK => 0);
dongsheng@623 526 $self->write($tmp_filename);
dongsheng@623 527 move_po_if_needed($tmp_filename, $filename);
dongsheng@623 528 } else {
dongsheng@623 529 $self->write($filename);
dongsheng@623 530 }
dongsheng@623 531 }
dongsheng@623 532
dongsheng@623 533 =item gettextize($$)
dongsheng@623 534
dongsheng@623 535 This function produces one translated message catalog from two catalogs, an
dongsheng@623 536 original and a translation. This process is described in L<po4a(7)|po4a.7>,
dongsheng@623 537 section I<Gettextization: how does it work?>.
dongsheng@623 538
dongsheng@623 539 =cut
dongsheng@623 540
dongsheng@623 541 sub gettextize {
dongsheng@623 542 my $this = shift;
dongsheng@623 543 my $class = ref($this) || $this;
dongsheng@623 544 my ($poorig,$potrans)=(shift,shift);
dongsheng@623 545
dongsheng@623 546 my $pores=Locale::Po4a::Po->new();
dongsheng@623 547
dongsheng@623 548 my $please_fail = 0;
dongsheng@623 549 my $toobad = dgettext("po4a",
dongsheng@623 550 "\nThe gettextization failed (once again). Don't give up, ".
dongsheng@623 551 "gettextizing is a subtle art, but this is only needed once ".
dongsheng@623 552 "to convert a project to the gorgeous luxus offered by po4a ".
dongsheng@623 553 "to translators.".
dongsheng@623 554 "\nPlease refer to the po4a(7) documentation, the section ".
dongsheng@623 555 "\"HOWTO convert a pre-existing translation to po4a?\" ".
dongsheng@623 556 "contains several hints to help you in your task");
dongsheng@623 557
dongsheng@623 558 # Don't fail right now when the entry count does not match. Instead, give
dongsheng@623 559 # it a try so that the user can see where we fail (which is probably where
dongsheng@623 560 # the problem is).
dongsheng@623 561 if ($poorig->count_entries_doc() > $potrans->count_entries_doc()) {
dongsheng@623 562 warn wrap_mod("po4a gettextize", dgettext("po4a",
dongsheng@623 563 "Original has more strings than the translation (%d>%d). ".
dongsheng@623 564 "Please fix it by editing the translated version to add ".
dongsheng@623 565 "some dummy entry."),
dongsheng@623 566 $poorig->count_entries_doc(),
dongsheng@623 567 $potrans->count_entries_doc());
dongsheng@623 568 $please_fail = 1;
dongsheng@623 569 } elsif ($poorig->count_entries_doc() < $potrans->count_entries_doc()) {
dongsheng@623 570 warn wrap_mod("po4a gettextize", dgettext("po4a",
dongsheng@623 571 "Original has less strings than the translation (%d<%d). ".
dongsheng@623 572 "Please fix it by removing the extra entry from the ".
dongsheng@623 573 "translated file. You may need an addendum (cf po4a(7)) ".
dongsheng@623 574 "to reput the chunk in place after gettextization. A ".
dongsheng@623 575 "possible cause is that a text duplicated in the original ".
dongsheng@623 576 "is not translated the same way each time. Remove one of ".
dongsheng@623 577 "the translations, and you're fine."),
dongsheng@623 578 $poorig->count_entries_doc(),
dongsheng@623 579 $potrans->count_entries_doc());
dongsheng@623 580 $please_fail = 1;
dongsheng@623 581 }
dongsheng@623 582
dongsheng@623 583 if ( $poorig->get_charset =~ /^utf-8$/i ) {
dongsheng@623 584 $potrans->to_utf8;
dongsheng@623 585 $pores->set_charset("utf-8");
dongsheng@623 586 } else {
dongsheng@623 587 if ($potrans->get_charset eq "CHARSET") {
dongsheng@623 588 $pores->set_charset("ascii");
dongsheng@623 589 } else {
dongsheng@623 590 $pores->set_charset($potrans->get_charset);
dongsheng@623 591 }
dongsheng@623 592 }
dongsheng@623 593 print "Po character sets:\n".
dongsheng@623 594 " original=".$poorig->get_charset."\n".
dongsheng@623 595 " translated=".$potrans->get_charset."\n".
dongsheng@623 596 " result=".$pores->get_charset."\n"
dongsheng@623 597 if $debug{'encoding'};
dongsheng@623 598
dongsheng@623 599 for (my ($o,$t)=(0,0) ;
dongsheng@623 600 $o<$poorig->count_entries_doc() && $t<$potrans->count_entries_doc();
dongsheng@623 601 $o++,$t++) {
dongsheng@623 602 #
dongsheng@623 603 # Extract some informations
dongsheng@623 604
dongsheng@623 605 my ($orig,$trans)=($poorig->msgid_doc($o),$potrans->msgid_doc($t));
dongsheng@623 606 # print STDERR "Matches [[$orig]]<<$trans>>\n";
dongsheng@623 607
dongsheng@623 608 my ($reforig,$reftrans)=($poorig->{po}{$orig}{'reference'},
dongsheng@623 609 $potrans->{po}{$trans}{'reference'});
dongsheng@623 610 my ($typeorig,$typetrans)=($poorig->{po}{$orig}{'type'},
dongsheng@623 611 $potrans->{po}{$trans}{'type'});
dongsheng@623 612
dongsheng@623 613 #
dongsheng@623 614 # Make sure the type of both string exist
dongsheng@623 615 #
dongsheng@623 616 die wrap_mod("po4a gettextize",
dongsheng@623 617 "Internal error: type of original string number %s ".
dongsheng@623 618 "isn't provided", $o)
dongsheng@623 619 if ($typeorig eq '');
dongsheng@623 620
dongsheng@623 621 die wrap_mod("po4a gettextize",
dongsheng@623 622 "Internal error: type of translated string number %s ".
dongsheng@623 623 "isn't provided", $o)
dongsheng@623 624 if ($typetrans eq '');
dongsheng@623 625
dongsheng@623 626 #
dongsheng@623 627 # Make sure both type are the same
dongsheng@623 628 #
dongsheng@623 629 if ($typeorig ne $typetrans){
dongsheng@623 630 $pores->write("gettextization.failed.po");
dongsheng@623 631 die wrap_msg(dgettext("po4a",
dongsheng@623 632 "po4a gettextization: Structure disparity between ".
dongsheng@623 633 "original and translated files:\n".
dongsheng@623 634 "msgid (at %s) is of type '%s' while\n".
dongsheng@623 635 "msgstr (at %s) is of type '%s'.\n".
dongsheng@623 636 "Original text: %s\n".
dongsheng@623 637 "Translated text: %s\n".
dongsheng@623 638 "(result so far dumped to gettextization.failed.po)").
dongsheng@623 639 "%s",
dongsheng@623 640 $reforig, $typeorig,
dongsheng@623 641 $reftrans, $typetrans,
dongsheng@623 642 $orig,
dongsheng@623 643 $trans,
dongsheng@623 644 $toobad);
dongsheng@623 645 }
dongsheng@623 646
dongsheng@623 647 #
dongsheng@623 648 # Push the entry
dongsheng@623 649 #
dongsheng@623 650 my $flags;
dongsheng@623 651 if (defined $poorig->{po}{$orig}{'flags'}) {
dongsheng@623 652 $flags = $poorig->{po}{$orig}{'flags'}." fuzzy";
dongsheng@623 653 } else {
dongsheng@623 654 $flags = "fuzzy";
dongsheng@623 655 }
dongsheng@623 656 $pores->push_raw('msgid' => $orig,
dongsheng@623 657 'msgstr' => $trans,
dongsheng@623 658 'flags' => $flags,
dongsheng@623 659 'type' => $typeorig,
dongsheng@623 660 'reference' => $reforig,
dongsheng@623 661 'conflict' => 1,
dongsheng@623 662 'transref' => $potrans->{po}{$trans}{'reference'})
dongsheng@623 663 unless (defined($pores->{po}{$orig})
dongsheng@623 664 and ($pores->{po}{$orig}{'msgstr'} eq $trans))
dongsheng@623 665 # FIXME: maybe we should be smarter about what reference should be
dongsheng@623 666 # sent to push_raw.
dongsheng@623 667 }
dongsheng@623 668
dongsheng@623 669 # make sure we return a useful error message when entry count differ
dongsheng@623 670 die "$toobad\n" if $please_fail;
dongsheng@623 671
dongsheng@623 672 return $pores;
dongsheng@623 673 }
dongsheng@623 674
dongsheng@623 675 =item filter($)
dongsheng@623 676
dongsheng@623 677 This function extracts a catalog from an existing one. Only the entries having
dongsheng@623 678 a reference in the given file will be placed in the resulting catalog.
dongsheng@623 679
dongsheng@623 680 This function parses its argument, converts it to a perl function definition,
dongsheng@623 681 eval this definition and filter the fields for which this function returns
dongsheng@623 682 true.
dongsheng@623 683
dongsheng@623 684 I love perl sometimes ;)
dongsheng@623 685
dongsheng@623 686 =cut
dongsheng@623 687
dongsheng@623 688 sub filter {
dongsheng@623 689 my $self=shift;
dongsheng@623 690 our $filter=shift;
dongsheng@623 691
dongsheng@623 692 my $res;
dongsheng@623 693 $res = Locale::Po4a::Po->new();
dongsheng@623 694
dongsheng@623 695 # Parse the filter
dongsheng@623 696 our $code="sub apply { return ";
dongsheng@623 697 our $pos=0;
dongsheng@623 698 our $length = length $filter;
dongsheng@623 699
dongsheng@623 700 # explode chars to parts. How to subscript a string in Perl?
dongsheng@623 701 our @filter = split(//,$filter);
dongsheng@623 702
dongsheng@623 703 sub gloups {
dongsheng@623 704 my $fmt=shift;
dongsheng@623 705 my $space = "";
dongsheng@623 706 for (1..$pos){
dongsheng@623 707 $space .= ' ';
dongsheng@623 708 }
dongsheng@623 709 die wrap_msg("$fmt\n$filter\n$space^ HERE");
dongsheng@623 710 }
dongsheng@623 711 sub showmethecode {
dongsheng@623 712 return unless $debug{'filter'};
dongsheng@623 713 my $fmt=shift;
dongsheng@623 714 my $space="";
dongsheng@623 715 for (1..$pos){
dongsheng@623 716 $space .= ' ';
dongsheng@623 717 }
dongsheng@623 718 print STDERR "$filter\n$space^ $fmt\n";#"$code\n";
dongsheng@623 719 }
dongsheng@623 720
dongsheng@623 721 # I dream of a lex in perl :-/
dongsheng@623 722 sub parse_expression {
dongsheng@623 723 showmethecode("Begin expression")
dongsheng@623 724 if $debug{'filter'};
dongsheng@623 725
dongsheng@623 726 gloups("Begin of expression expected, got '%s'",$filter[$pos])
dongsheng@623 727 unless ($filter[$pos] eq '(');
dongsheng@623 728 $pos ++; # pass the '('
dongsheng@623 729 if ($filter[$pos] eq '&') {
dongsheng@623 730 # AND
dongsheng@623 731 $pos++;
dongsheng@623 732 showmethecode("Begin of AND")
dongsheng@623 733 if $debug{'filter'};
dongsheng@623 734 $code .= "(";
dongsheng@623 735 while (1) {
dongsheng@623 736 gloups ("Unfinished AND statement.")
dongsheng@623 737 if ($pos == $length);
dongsheng@623 738 parse_expression();
dongsheng@623 739 if ($filter[$pos] eq '(') {
dongsheng@623 740 $code .= " && ";
dongsheng@623 741 } elsif ($filter[$pos] eq ')') {
dongsheng@623 742 last; # do not eat that char
dongsheng@623 743 } else {
dongsheng@623 744 gloups("End of AND or begin of sub-expression expected, got '%s'", $filter[$pos]);
dongsheng@623 745 }
dongsheng@623 746 }
dongsheng@623 747 $code .= ")";
dongsheng@623 748 } elsif ($filter[$pos] eq '|') {
dongsheng@623 749 # OR
dongsheng@623 750 $pos++;
dongsheng@623 751 $code .= "(";
dongsheng@623 752 while (1) {
dongsheng@623 753 gloups("Unfinished OR statement.")
dongsheng@623 754 if ($pos == $length);
dongsheng@623 755 parse_expression();
dongsheng@623 756 if ($filter[$pos] eq '(') {
dongsheng@623 757 $code .= " || ";
dongsheng@623 758 } elsif ($filter[$pos] eq ')') {
dongsheng@623 759 last; # do not eat that char
dongsheng@623 760 } else {
dongsheng@623 761 gloups("End of OR or begin of sub-expression expected, got '%s'",$filter[$pos]);
dongsheng@623 762 }
dongsheng@623 763 }
dongsheng@623 764 $code .= ")";
dongsheng@623 765 } elsif ($filter[$pos] eq '!') {
dongsheng@623 766 # NOT
dongsheng@623 767 $pos++;
dongsheng@623 768 $code .= "(!";
dongsheng@623 769 gloups("Missing sub-expression in NOT statement.")
dongsheng@623 770 if ($pos == $length);
dongsheng@623 771 parse_expression();
dongsheng@623 772 $code .= ")";
dongsheng@623 773 } else {
dongsheng@623 774 # must be an equal. Let's get field and argument
dongsheng@623 775 my ($field,$arg,$done);
dongsheng@623 776 $field = substr($filter,$pos);
dongsheng@623 777 gloups("EQ statement contains no '=' or invalid field name")
dongsheng@623 778 unless ($field =~ /([a-z]*)=/i);
dongsheng@623 779 $field = lc($1);
dongsheng@623 780 $pos += (length $field) + 1;
dongsheng@623 781
dongsheng@623 782 # check that we've got a valid field name,
dongsheng@623 783 # and the number it referes to
dongsheng@623 784 # DO NOT CHANGE THE ORDER
dongsheng@623 785 my @names=qw(msgid msgstr reference flags comment automatic);
dongsheng@623 786 my $fieldpos;
dongsheng@623 787 for ($fieldpos = 0;
dongsheng@623 788 $fieldpos < scalar @names && $field ne $names[$fieldpos];
dongsheng@623 789 $fieldpos++) {}
dongsheng@623 790 gloups("Invalid field name: %s",$field)
dongsheng@623 791 if $fieldpos == scalar @names; # not found
dongsheng@623 792
dongsheng@623 793 # Now, get the argument value. It has to be between quotes,
dongsheng@623 794 # which can be escaped
dongsheng@623 795 # We point right on the first char of the argument
dongsheng@623 796 # (first quote already eaten)
dongsheng@623 797 my $escaped = 0;
dongsheng@623 798 my $quoted = 0;
dongsheng@623 799 if ($filter[$pos] eq '"') {
dongsheng@623 800 $pos++;
dongsheng@623 801 $quoted = 1;
dongsheng@623 802 }
dongsheng@623 803 showmethecode(($quoted?"Quoted":"Unquoted")." argument of field '$field'")
dongsheng@623 804 if $debug{'filter'};
dongsheng@623 805
dongsheng@623 806 while (!$done) {
dongsheng@623 807 gloups("Unfinished EQ argument.")
dongsheng@623 808 if ($pos == $length);
dongsheng@623 809
dongsheng@623 810 if ($quoted) {
dongsheng@623 811 if ($filter[$pos] eq '\\') {
dongsheng@623 812 if ($escaped) {
dongsheng@623 813 $arg .= '\\';
dongsheng@623 814 $escaped = 0;
dongsheng@623 815 } else {
dongsheng@623 816 $escaped = 1;
dongsheng@623 817 }
dongsheng@623 818 } elsif ($escaped) {
dongsheng@623 819 if ($filter[$pos] eq '"') {
dongsheng@623 820 $arg .= '"';
dongsheng@623 821 $escaped = 0;
dongsheng@623 822 } else {
dongsheng@623 823 gloups("Invalid escape sequence in argument: '\\%s'",$filter[$pos]);
dongsheng@623 824 }
dongsheng@623 825 } else {
dongsheng@623 826 if ($filter[$pos] eq '"') {
dongsheng@623 827 $done = 1;
dongsheng@623 828 } else {
dongsheng@623 829 $arg .= $filter[$pos];
dongsheng@623 830 }
dongsheng@623 831 }
dongsheng@623 832 } else {
dongsheng@623 833 if ($filter[$pos] eq ')') {
dongsheng@623 834 # counter the next ++ since we don't want to eat
dongsheng@623 835 # this char
dongsheng@623 836 $pos--;
dongsheng@623 837 $done = 1;
dongsheng@623 838 } else {
dongsheng@623 839 $arg .= $filter[$pos];
dongsheng@623 840 }
dongsheng@623 841 }
dongsheng@623 842 $pos++;
dongsheng@623 843 }
dongsheng@623 844 # and now, add the code to check this equality
dongsheng@623 845 $code .= "(\$_[$fieldpos] =~ m/$arg/)";
dongsheng@623 846
dongsheng@623 847 }
dongsheng@623 848 showmethecode("End of expression")
dongsheng@623 849 if $debug{'filter'};
dongsheng@623 850 gloups("Unfinished statement.")
dongsheng@623 851 if ($pos == $length);
dongsheng@623 852 gloups("End of expression expected, got '%s'",$filter[$pos])
dongsheng@623 853 unless ($filter[$pos] eq ')');
dongsheng@623 854 $pos++;
dongsheng@623 855 }
dongsheng@623 856 # And now, launch the beast, finish the function and use eval
dongsheng@623 857 # to construct this function.
dongsheng@623 858 # Ok, the lack of lexer is a fair price for the eval ;)
dongsheng@623 859 parse_expression();
dongsheng@623 860 gloups("Garbage at the end of the expression")
dongsheng@623 861 if ($pos != $length);
dongsheng@623 862 $code .= "; }";
dongsheng@623 863 print STDERR "CODE = $code\n"
dongsheng@623 864 if $debug{'filter'};
dongsheng@623 865 eval $code;
dongsheng@623 866 die wrap_mod("po4a::po", dgettext("po4a", "Eval failure: %s"), $@)
dongsheng@623 867 if $@;
dongsheng@623 868
dongsheng@623 869 for (my $cpt=(0) ;
dongsheng@623 870 $cpt<$self->count_entries();
dongsheng@623 871 $cpt++) {
dongsheng@623 872
dongsheng@623 873 my ($msgid,$ref,$msgstr,$flags,$type,$comment,$automatic);
dongsheng@623 874
dongsheng@623 875 $msgid = $self->msgid($cpt);
dongsheng@623 876 $ref=$self->{po}{$msgid}{'reference'};
dongsheng@623 877
dongsheng@623 878 $msgstr= $self->{po}{$msgid}{'msgstr'};
dongsheng@623 879 $flags = $self->{po}{$msgid}{'flags'};
dongsheng@623 880 $type = $self->{po}{$msgid}{'type'};
dongsheng@623 881 $comment = $self->{po}{$msgid}{'comment'};
dongsheng@623 882 $automatic = $self->{po}{$msgid}{'automatic'};
dongsheng@623 883
dongsheng@623 884 # DO NOT CHANGE THE ORDER
dongsheng@623 885 $res->push_raw('msgid' => $msgid,
dongsheng@623 886 'msgstr' => $msgstr,
dongsheng@623 887 'flags' => $flags,
dongsheng@623 888 'type' => $type,
dongsheng@623 889 'reference' => $ref,
dongsheng@623 890 'comment' => $comment,
dongsheng@623 891 'automatic' => $automatic)
dongsheng@623 892 if (apply($msgid,$msgstr,$ref,$flags,$comment,$automatic));
dongsheng@623 893 }
dongsheng@623 894 # delete the apply subroutine
dongsheng@623 895 # otherwise it will be redefined.
dongsheng@623 896 undef &apply;
dongsheng@623 897 return $res;
dongsheng@623 898 }
dongsheng@623 899
dongsheng@623 900 =item to_utf8()
dongsheng@623 901
dongsheng@623 902 Recodes to utf-8 the po's msgstrs. Does nothing if the charset is not
dongsheng@623 903 specified in the po file ("CHARSET" value), or if it's already utf-8 or
dongsheng@623 904 ascii.
dongsheng@623 905
dongsheng@623 906 =cut
dongsheng@623 907
dongsheng@623 908 sub to_utf8 {
dongsheng@623 909 my $this = shift;
dongsheng@623 910 my $charset = $this->get_charset();
dongsheng@623 911
dongsheng@623 912 unless ($charset eq "CHARSET" or
dongsheng@623 913 $charset =~ /^ascii$/i or
dongsheng@623 914 $charset =~ /^utf-8$/i) {
dongsheng@623 915 foreach my $msgid ( keys %{$this->{po}} ) {
dongsheng@623 916 Encode::from_to($this->{po}{$msgid}{'msgstr'}, $charset, "utf-8");
dongsheng@623 917 }
dongsheng@623 918 $this->set_charset("utf-8");
dongsheng@623 919 }
dongsheng@623 920 }
dongsheng@623 921
dongsheng@623 922 =back
dongsheng@623 923
dongsheng@623 924 =head1 Functions to use a message catalog for translations
dongsheng@623 925
dongsheng@623 926 =over 4
dongsheng@623 927
dongsheng@623 928 =item gettext($%)
dongsheng@623 929
dongsheng@623 930 Request the translation of the string given as argument in the current catalog.
dongsheng@623 931 The function returns the original (untranslated) string if the string was not
dongsheng@623 932 found.
dongsheng@623 933
dongsheng@623 934 After the string to translate, you can pass a hash of extra
dongsheng@623 935 arguments. Here are the valid entries:
dongsheng@623 936
dongsheng@623 937 =over
dongsheng@623 938
dongsheng@623 939 =item wrap
dongsheng@623 940
dongsheng@623 941 boolean indicating whether we can consider that whitespaces in string are
dongsheng@623 942 not important. If yes, the function canonizes the string before looking for
dongsheng@623 943 a translation, and wraps the result.
dongsheng@623 944
dongsheng@623 945 =item wrapcol
dongsheng@623 946
dongsheng@623 947 The column at which we should wrap (default: 76).
dongsheng@623 948
dongsheng@623 949 =back
dongsheng@623 950
dongsheng@623 951 =cut
dongsheng@623 952
dongsheng@623 953 sub gettext {
dongsheng@623 954 my $self=shift;
dongsheng@623 955 my $text=shift;
dongsheng@623 956 my (%opt)=@_;
dongsheng@623 957 my $res;
dongsheng@623 958
dongsheng@623 959 return "" unless defined($text) && length($text); # Avoid returning the header.
dongsheng@623 960 my $validoption="reference wrap wrapcol";
dongsheng@623 961 my %validoption;
dongsheng@623 962
dongsheng@623 963 map { $validoption{$_}=1 } (split(/ /,$validoption));
dongsheng@623 964 foreach (keys %opt) {
dongsheng@623 965 Carp::confess "internal error: unknown arg $_.\n".
dongsheng@623 966 "Here are the valid options: $validoption.\n"
dongsheng@623 967 unless $validoption{$_};
dongsheng@623 968 }
dongsheng@623 969
dongsheng@623 970 $text=canonize($text)
dongsheng@623 971 if ($opt{'wrap'});
dongsheng@623 972
dongsheng@623 973 my $esc_text=escape_text($text);
dongsheng@623 974
dongsheng@623 975 $self->{gettextqueries}++;
dongsheng@623 976
dongsheng@623 977 if ( defined $self->{po}{$esc_text}
dongsheng@623 978 and defined $self->{po}{$esc_text}{'msgstr'}
dongsheng@623 979 and length $self->{po}{$esc_text}{'msgstr'}
dongsheng@623 980 and ( not defined $self->{po}{$esc_text}{'flags'}
dongsheng@623 981 or $self->{po}{$esc_text}{'flags'} !~ /fuzzy/)) {
dongsheng@623 982
dongsheng@623 983 $self->{gettexthits}++;
dongsheng@623 984 $res = unescape_text($self->{po}{$esc_text}{'msgstr'});
dongsheng@623 985 if (defined $self->{po}{$esc_text}{'plural'}) {
dongsheng@623 986 if ($self->{po}{$esc_text}{'plural'} eq "0") {
dongsheng@623 987 warn wrap_mod("po4a gettextize", dgettext("po4a",
dongsheng@623 988 "'%s' is the singular form of a message, ".
dongsheng@623 989 "po4a will use the msgstr[0] translation (%s)."),
dongsheng@623 990 $esc_text, $res);
dongsheng@623 991 } else {
dongsheng@623 992 warn wrap_mod("po4a gettextize", dgettext("po4a",
dongsheng@623 993 "'%s' is the plural form of a message, ".
dongsheng@623 994 "po4a will use the msgstr[1] translation (%s)."),
dongsheng@623 995 $esc_text, $res);
dongsheng@623 996 }
dongsheng@623 997 }
dongsheng@623 998 } else {
dongsheng@623 999 $res = $text;
dongsheng@623 1000 }
dongsheng@623 1001
dongsheng@623 1002 if ($opt{'wrap'}) {
dongsheng@623 1003 if ($self->get_charset =~ /^utf-8$/i) {
dongsheng@623 1004 $res=Encode::decode_utf8($res);
dongsheng@623 1005 $res=wrap ($res, $opt{'wrapcol'} || 76);
dongsheng@623 1006 $res=Encode::encode_utf8($res);
dongsheng@623 1007 } else {
dongsheng@623 1008 $res=wrap ($res, $opt{'wrapcol'} || 76);
dongsheng@623 1009 }
dongsheng@623 1010 }
dongsheng@623 1011 # print STDERR "Gettext >>>$text<<<(escaped=$esc_text)=[[[$res]]]\n\n";
dongsheng@623 1012 return $res;
dongsheng@623 1013 }
dongsheng@623 1014
dongsheng@623 1015 =item stats_get()
dongsheng@623 1016
dongsheng@623 1017 Returns statistics about the hit ratio of gettext since the last time that
dongsheng@623 1018 stats_clear() was called. Please note that it's not the same
dongsheng@623 1019 statistics than the one printed by msgfmt --statistic. Here, it's statistics
dongsheng@623 1020 about recent usage of the po file, while msgfmt reports the status of the
dongsheng@623 1021 file. Example of use:
dongsheng@623 1022
dongsheng@623 1023 [some use of the po file to translate stuff]
dongsheng@623 1024
dongsheng@623 1025 ($percent,$hit,$queries) = $pofile->stats_get();
dongsheng@623 1026 print "So far, we found translations for $percent\% ($hit of $queries) of strings.\n";
dongsheng@623 1027
dongsheng@623 1028 =cut
dongsheng@623 1029
dongsheng@623 1030 sub stats_get() {
dongsheng@623 1031 my $self=shift;
dongsheng@623 1032 my ($h,$q)=($self->{gettexthits},$self->{gettextqueries});
dongsheng@623 1033 my $p = ($q == 0 ? 100 : int($h/$q*10000)/100);
dongsheng@623 1034
dongsheng@623 1035 # $p =~ s/\.00//;
dongsheng@623 1036 # $p =~ s/(\..)0/$1/;
dongsheng@623 1037
dongsheng@623 1038 return ( $p,$h,$q );
dongsheng@623 1039 }
dongsheng@623 1040
dongsheng@623 1041 =item stats_clear()
dongsheng@623 1042
dongsheng@623 1043 Clears the statistics about gettext hits.
dongsheng@623 1044
dongsheng@623 1045 =cut
dongsheng@623 1046
dongsheng@623 1047 sub stats_clear {
dongsheng@623 1048 my $self = shift;
dongsheng@623 1049 $self->{gettextqueries} = 0;
dongsheng@623 1050 $self->{gettexthits} = 0;
dongsheng@623 1051 }
dongsheng@623 1052
dongsheng@623 1053 =back
dongsheng@623 1054
dongsheng@623 1055 =head1 Functions to build a message catalog
dongsheng@623 1056
dongsheng@623 1057 =over 4
dongsheng@623 1058
dongsheng@623 1059 =item push(%)
dongsheng@623 1060
dongsheng@623 1061 Push a new entry at the end of the current catalog. The arguments should
dongsheng@623 1062 form a hash table. The valid keys are:
dongsheng@623 1063
dongsheng@623 1064 =over 4
dongsheng@623 1065
dongsheng@623 1066 =item msgid
dongsheng@623 1067
dongsheng@623 1068 the string in original language.
dongsheng@623 1069
dongsheng@623 1070 =item msgstr
dongsheng@623 1071
dongsheng@623 1072 the translation.
dongsheng@623 1073
dongsheng@623 1074 =item reference
dongsheng@623 1075
dongsheng@623 1076 an indication of where this string was found. Example: file.c:46 (meaning
dongsheng@623 1077 in 'file.c' at line 46). It can be a space-separated list in case of
dongsheng@623 1078 multiple occurrences.
dongsheng@623 1079
dongsheng@623 1080 =item comment
dongsheng@623 1081
dongsheng@623 1082 a comment added here manually (by the translators). The format here is free.
dongsheng@623 1083
dongsheng@623 1084 =item automatic
dongsheng@623 1085
dongsheng@623 1086 a comment which was automatically added by the string extraction
dongsheng@623 1087 program. See the I<--add-comments> option of the B<xgettext> program for
dongsheng@623 1088 more information.
dongsheng@623 1089
dongsheng@623 1090 =item flags
dongsheng@623 1091
dongsheng@623 1092 space-separated list of all defined flags for this entry.
dongsheng@623 1093
dongsheng@623 1094 Valid flags are: c-text, python-text, lisp-text, elisp-text, librep-text,
dongsheng@623 1095 smalltalk-text, java-text, awk-text, object-pascal-text, ycp-text,
dongsheng@623 1096 tcl-text, wrap, no-wrap and fuzzy.
dongsheng@623 1097
dongsheng@623 1098 See the gettext documentation for their meaning.
dongsheng@623 1099
dongsheng@623 1100 =item type
dongsheng@623 1101
dongsheng@623 1102 This is mostly an internal argument: it is used while gettextizing
dongsheng@623 1103 documents. The idea here is to parse both the original and the translation
dongsheng@623 1104 into a po object, and merge them, using one's msgid as msgid and the
dongsheng@623 1105 other's msgid as msgstr. To make sure that things get ok, each msgid in po
dongsheng@623 1106 objects are given a type, based on their structure (like "chapt", "sect1",
dongsheng@623 1107 "p" and so on in docbook). If the types of strings are not the same, that
dongsheng@623 1108 means that both files do not share the same structure, and the process
dongsheng@623 1109 reports an error.
dongsheng@623 1110
dongsheng@623 1111 This information is written as automatic comment in the po file since this
dongsheng@623 1112 gives to translators some context about the strings to translate.
dongsheng@623 1113
dongsheng@623 1114 =item wrap
dongsheng@623 1115
dongsheng@623 1116 boolean indicating whether whitespaces can be mangled in cosmetic
dongsheng@623 1117 reformattings. If true, the string is canonized before use.
dongsheng@623 1118
dongsheng@623 1119 This information is written to the po file using the 'wrap' or 'no-wrap' flag.
dongsheng@623 1120
dongsheng@623 1121 =item wrapcol
dongsheng@623 1122
dongsheng@623 1123 The column at which we should wrap (default: 76).
dongsheng@623 1124
dongsheng@623 1125 This information is not written to the po file.
dongsheng@623 1126
dongsheng@623 1127 =back
dongsheng@623 1128
dongsheng@623 1129 =cut
dongsheng@623 1130
dongsheng@623 1131 sub push {
dongsheng@623 1132 my $self=shift;
dongsheng@623 1133 my %entry=@_;
dongsheng@623 1134
dongsheng@623 1135 my $validoption="wrap wrapcol type msgid msgstr automatic flags reference";
dongsheng@623 1136 my %validoption;
dongsheng@623 1137
dongsheng@623 1138 map { $validoption{$_}=1 } (split(/ /,$validoption));
dongsheng@623 1139 foreach (keys %entry) {
dongsheng@623 1140 Carp::confess "internal error: unknown arg $_.\n".
dongsheng@623 1141 "Here are the valid options: $validoption.\n"
dongsheng@623 1142 unless $validoption{$_};
dongsheng@623 1143 }
dongsheng@623 1144
dongsheng@623 1145 unless ($entry{'wrap'}) {
dongsheng@623 1146 $entry{'flags'} .= " no-wrap";
dongsheng@623 1147 }
dongsheng@623 1148 if (defined ($entry{'msgid'})) {
dongsheng@623 1149 $entry{'msgid'} = canonize($entry{'msgid'})
dongsheng@623 1150 if ($entry{'wrap'});
dongsheng@623 1151
dongsheng@623 1152 $entry{'msgid'} = escape_text($entry{'msgid'});
dongsheng@623 1153 }
dongsheng@623 1154 if (defined ($entry{'msgstr'})) {
dongsheng@623 1155 $entry{'msgstr'} = canonize($entry{'msgstr'})
dongsheng@623 1156 if ($entry{'wrap'});
dongsheng@623 1157
dongsheng@623 1158 $entry{'msgstr'} = escape_text($entry{'msgstr'});
dongsheng@623 1159 }
dongsheng@623 1160
dongsheng@623 1161 $self->push_raw(%entry);
dongsheng@623 1162 }
dongsheng@623 1163
dongsheng@623 1164 # The same as push(), but assuming that msgid and msgstr are already escaped
dongsheng@623 1165 sub push_raw {
dongsheng@623 1166 my $self=shift;
dongsheng@623 1167 my %entry=@_;
dongsheng@623 1168 my ($msgid,$msgstr,$reference,$comment,$automatic,$flags,$type,$transref)=
dongsheng@623 1169 ($entry{'msgid'},$entry{'msgstr'},
dongsheng@623 1170 $entry{'reference'},$entry{'comment'},$entry{'automatic'},
dongsheng@623 1171 $entry{'flags'},$entry{'type'},$entry{'transref'});
dongsheng@623 1172 my $keep_conflict = $entry{'conflict'};
dongsheng@623 1173
dongsheng@623 1174 # print STDERR "Push_raw\n";
dongsheng@623 1175 # print STDERR " msgid=>>>$msgid<<<\n" if $msgid;
dongsheng@623 1176 # print STDERR " msgstr=[[[$msgstr]]]\n" if $msgstr;
dongsheng@623 1177 # Carp::cluck " flags=$flags\n" if $flags;
dongsheng@623 1178
dongsheng@623 1179 return unless defined($entry{'msgid'});
dongsheng@623 1180
dongsheng@623 1181 #no msgid => header definition
dongsheng@623 1182 unless (length($entry{'msgid'})) {
dongsheng@623 1183 # if (defined($self->{header}) && $self->{header} =~ /\S/) {
dongsheng@623 1184 # warn dgettext("po4a","Redefinition of the header. ".
dongsheng@623 1185 # "The old one will be discarded\n");
dongsheng@623 1186 # } FIXME: do that iff the header isn't the default one.
dongsheng@623 1187 $self->{header}=$msgstr;
dongsheng@623 1188 $self->{header_comment}=$comment;
dongsheng@623 1189 my $charset = $self->get_charset;
dongsheng@623 1190 if ($charset ne "CHARSET") {
dongsheng@623 1191 $self->{encoder}=find_encoding($charset);
dongsheng@623 1192 } else {
dongsheng@623 1193 $self->{encoder}=find_encoding("ascii");
dongsheng@623 1194 }
dongsheng@623 1195 return;
dongsheng@623 1196 }
dongsheng@623 1197
dongsheng@623 1198 if ($self->{options}{'porefs'} eq "none") {
dongsheng@623 1199 $reference = "";
dongsheng@623 1200 } elsif ($self->{options}{'porefs'} eq "noline") {
dongsheng@623 1201 $reference =~ s/:[0-9]*/:1/g;
dongsheng@623 1202 }
dongsheng@623 1203
dongsheng@623 1204 if (defined($self->{po}{$msgid})) {
dongsheng@623 1205 warn wrap_mod("po4a::po",
dongsheng@623 1206 dgettext("po4a","msgid defined twice: %s"),
dongsheng@623 1207 $msgid)
dongsheng@623 1208 if (0); # FIXME: put a verbose stuff
dongsheng@623 1209 if ( defined $msgstr
dongsheng@623 1210 and defined $self->{po}{$msgid}{'msgstr'}
dongsheng@623 1211 and $self->{po}{$msgid}{'msgstr'} ne $msgstr) {
dongsheng@623 1212 my $txt=quote_text($msgid);
dongsheng@623 1213 my ($first,$second)=
dongsheng@623 1214 (format_comment(". ",$self->{po}{$msgid}{'reference'}).
dongsheng@623 1215 quote_text($self->{po}{$msgid}{'msgstr'}),
dongsheng@623 1216
dongsheng@623 1217 format_comment(". ",$reference).
dongsheng@623 1218 quote_text($msgstr));
dongsheng@623 1219
dongsheng@623 1220 if ($keep_conflict) {
dongsheng@623 1221 if ($self->{po}{$msgid}{'msgstr'} =~ m/^#-#-#-#-# .* #-#-#-#-#\\n/s) {
dongsheng@623 1222 $msgstr = $self->{po}{$msgid}{'msgstr'}.
dongsheng@623 1223 "\\n#-#-#-#-# $transref #-#-#-#-#\\n".
dongsheng@623 1224 $msgstr;
dongsheng@623 1225 } else {
dongsheng@623 1226 $msgstr = "#-#-#-#-# ".
dongsheng@623 1227 $self->{po}{$msgid}{'transref'}.
dongsheng@623 1228 " #-#-#-#-#\\n".
dongsheng@623 1229 $self->{po}{$msgid}{'msgstr'}."\\n".
dongsheng@623 1230 "#-#-#-#-# $transref #-#-#-#-#\\n".
dongsheng@623 1231 $msgstr;
dongsheng@623 1232 }
dongsheng@623 1233 # Every msgid will have the same list of references.
dongsheng@623 1234 # Only keep the last list.
dongsheng@623 1235 $self->{po}{$msgid}{'reference'} = "";
dongsheng@623 1236 } else {
dongsheng@623 1237 warn wrap_msg(dgettext("po4a",
dongsheng@623 1238 "Translations don't match for:\n".
dongsheng@623 1239 "%s\n".
dongsheng@623 1240 "-->First translation:\n".
dongsheng@623 1241 "%s\n".
dongsheng@623 1242 " Second translation:\n".
dongsheng@623 1243 "%s\n".
dongsheng@623 1244 " Old translation discarded."),
dongsheng@623 1245 $txt,$first,$second);
dongsheng@623 1246 }
dongsheng@623 1247 }
dongsheng@623 1248 }
dongsheng@623 1249 if (defined $transref) {
dongsheng@623 1250 $self->{po}{$msgid}{'transref'} = $transref;
dongsheng@623 1251 }
dongsheng@623 1252 if (defined $reference) {
dongsheng@623 1253 if (defined $self->{po}{$msgid}{'reference'}) {
dongsheng@623 1254 $self->{po}{$msgid}{'reference'} .= " ".$reference;
dongsheng@623 1255 } else {
dongsheng@623 1256 $self->{po}{$msgid}{'reference'} = $reference;
dongsheng@623 1257 }
dongsheng@623 1258 }
dongsheng@623 1259 $self->{po}{$msgid}{'msgstr'} = $msgstr;
dongsheng@623 1260 $self->{po}{$msgid}{'comment'} = $comment;
dongsheng@623 1261 $self->{po}{$msgid}{'automatic'} = $automatic;
dongsheng@623 1262 if (defined($self->{po}{$msgid}{'pos_doc'})) {
dongsheng@623 1263 $self->{po}{$msgid}{'pos_doc'} .= " ".$self->{count_doc}++;
dongsheng@623 1264 } else {
dongsheng@623 1265 $self->{po}{$msgid}{'pos_doc'} = $self->{count_doc}++;
dongsheng@623 1266 }
dongsheng@623 1267 unless (defined($self->{po}{$msgid}{'pos'})) {
dongsheng@623 1268 $self->{po}{$msgid}{'pos'} = $self->{count}++;
dongsheng@623 1269 }
dongsheng@623 1270 $self->{po}{$msgid}{'type'} = $type;
dongsheng@623 1271 $self->{po}{$msgid}{'plural'} = $entry{'plural'}
dongsheng@623 1272 if defined $entry{'plural'};
dongsheng@623 1273
dongsheng@623 1274 if (defined($flags)) {
dongsheng@623 1275 $flags = " $flags ";
dongsheng@623 1276 $flags =~ s/,/ /g;
dongsheng@623 1277 foreach my $flag (@known_flags) {
dongsheng@623 1278 if ($flags =~ /\s$flag\s/) { # if flag to be set
dongsheng@623 1279 unless ( defined($self->{po}{$msgid}{'flags'})
dongsheng@623 1280 && $self->{po}{$msgid}{'flags'} =~ /\b$flag\b/) {
dongsheng@623 1281 # flag not already set
dongsheng@623 1282 if (defined $self->{po}{$msgid}{'flags'}) {
dongsheng@623 1283 $self->{po}{$msgid}{'flags'} .= " ".$flag;
dongsheng@623 1284 } else {
dongsheng@623 1285 $self->{po}{$msgid}{'flags'} = $flag;
dongsheng@623 1286 }
dongsheng@623 1287 }
dongsheng@623 1288 }
dongsheng@623 1289 }
dongsheng@623 1290 }
dongsheng@623 1291 # print STDERR "stored ((($msgid)))=>(((".$self->{po}{$msgid}{'msgstr'}.")))\n\n";
dongsheng@623 1292
dongsheng@623 1293 }
dongsheng@623 1294
dongsheng@623 1295 =back
dongsheng@623 1296
dongsheng@623 1297 =head1 Miscellaneous functions
dongsheng@623 1298
dongsheng@623 1299 =over 4
dongsheng@623 1300
dongsheng@623 1301 =item count_entries()
dongsheng@623 1302
dongsheng@623 1303 Returns the number of entries in the catalog (without the header).
dongsheng@623 1304
dongsheng@623 1305 =cut
dongsheng@623 1306
dongsheng@623 1307 sub count_entries($) {
dongsheng@623 1308 my $self=shift;
dongsheng@623 1309 return $self->{count};
dongsheng@623 1310 }
dongsheng@623 1311
dongsheng@623 1312 =item count_entries_doc()
dongsheng@623 1313
dongsheng@623 1314 Returns the number of entries in document. If a string appears multiple times
dongsheng@623 1315 in the document, it will be counted multiple times
dongsheng@623 1316
dongsheng@623 1317 =cut
dongsheng@623 1318
dongsheng@623 1319 sub count_entries_doc($) {
dongsheng@623 1320 my $self=shift;
dongsheng@623 1321 return $self->{count_doc};
dongsheng@623 1322 }
dongsheng@623 1323
dongsheng@623 1324 =item msgid($)
dongsheng@623 1325
dongsheng@623 1326 Returns the msgid of the given number.
dongsheng@623 1327
dongsheng@623 1328 =cut
dongsheng@623 1329
dongsheng@623 1330 sub msgid($$) {
dongsheng@623 1331 my $self=shift;
dongsheng@623 1332 my $num=shift;
dongsheng@623 1333
dongsheng@623 1334 foreach my $msgid ( keys %{$self->{po}} ) {
dongsheng@623 1335 return $msgid if ($self->{po}{$msgid}{'pos'} eq $num);
dongsheng@623 1336 }
dongsheng@623 1337 return undef;
dongsheng@623 1338 }
dongsheng@623 1339
dongsheng@623 1340 =item msgid_doc($)
dongsheng@623 1341
dongsheng@623 1342 Returns the msgid with the given position in the document.
dongsheng@623 1343
dongsheng@623 1344 =cut
dongsheng@623 1345
dongsheng@623 1346 sub msgid_doc($$) {
dongsheng@623 1347 my $self=shift;
dongsheng@623 1348 my $num=shift;
dongsheng@623 1349
dongsheng@623 1350 foreach my $msgid ( keys %{$self->{po}} ) {
dongsheng@623 1351 foreach my $pos (split / /, $self->{po}{$msgid}{'pos_doc'}) {
dongsheng@623 1352 return $msgid if ($pos eq $num);
dongsheng@623 1353 }
dongsheng@623 1354 }
dongsheng@623 1355 return undef;
dongsheng@623 1356 }
dongsheng@623 1357
dongsheng@623 1358 =item get_charset()
dongsheng@623 1359
dongsheng@623 1360 Returns the character set specified in the po header. If it hasn't been
dongsheng@623 1361 set, it will return "CHARSET".
dongsheng@623 1362
dongsheng@623 1363 =cut
dongsheng@623 1364
dongsheng@623 1365 sub get_charset() {
dongsheng@623 1366 my $self=shift;
dongsheng@623 1367
dongsheng@623 1368 $self->{header} =~ /charset=(.*?)[\s\\]/;
dongsheng@623 1369
dongsheng@623 1370 if (defined $1) {
dongsheng@623 1371 return $1;
dongsheng@623 1372 } else {
dongsheng@623 1373 return "CHARSET";
dongsheng@623 1374 }
dongsheng@623 1375 }
dongsheng@623 1376
dongsheng@623 1377 =item set_charset($)
dongsheng@623 1378
dongsheng@623 1379 This sets the character set of the po header to the value specified in its
dongsheng@623 1380 first argument. If you never call this function (and no file with a specified
dongsheng@623 1381 character set is read), the default value is left to "CHARSET". This value
dongsheng@623 1382 doesn't change the behavior of this module, it's just used to fill that field
dongsheng@623 1383 in the header, and to return it in get_charset().
dongsheng@623 1384
dongsheng@623 1385 =cut
dongsheng@623 1386
dongsheng@623 1387 sub set_charset() {
dongsheng@623 1388 my $self=shift;
dongsheng@623 1389
dongsheng@623 1390 my ($newchar,$oldchar);
dongsheng@623 1391 $newchar = shift;
dongsheng@623 1392 $oldchar = $self->get_charset();
dongsheng@623 1393
dongsheng@623 1394 $self->{header} =~ s/$oldchar/$newchar/;
dongsheng@623 1395 $self->{encoder}=find_encoding($newchar);
dongsheng@623 1396 }
dongsheng@623 1397
dongsheng@623 1398 #----[ helper functions ]---------------------------------------------------
dongsheng@623 1399
dongsheng@623 1400 # transforme the string from its po file representation to the form which
dongsheng@623 1401 # should be used to print it
dongsheng@623 1402 sub unescape_text {
dongsheng@623 1403 my $text = shift;
dongsheng@623 1404
dongsheng@623 1405 print STDERR "\nunescape [$text]====" if $debug{'escape'};
dongsheng@623 1406 $text = join("",split(/\n/,$text));
dongsheng@623 1407 $text =~ s/\\"/"/g;
dongsheng@623 1408 # unescape newlines
dongsheng@623 1409 # NOTE on \G:
dongsheng@623 1410 # The following regular expression introduce newlines.
dongsheng@623 1411 # Thus, ^ doesn't match all beginnings of lines.
dongsheng@623 1412 # \G is a zero-width assertion that matches the position
dongsheng@623 1413 # of the previous substitution with s///g. As every
dongsheng@623 1414 # substitution ends by a newline, it always matches a
dongsheng@623 1415 # position just after a newline.
dongsheng@623 1416 $text =~ s/( # $1:
dongsheng@623 1417 (\G|[^\\]) # beginning of the line or any char
dongsheng@623 1418 # different from '\'
dongsheng@623 1419 (\\\\)* # followed by any even number of '\'
dongsheng@623 1420 )\\n # and followed by an escaped newline
dongsheng@623 1421 /$1\n/sgx; # single string, match globally, allow comments
dongsheng@623 1422 # unescape tabulations
dongsheng@623 1423 $text =~ s/( # $1:
dongsheng@623 1424 (\G|[^\\])# beginning of the line or any char
dongsheng@623 1425 # different from '\'
dongsheng@623 1426 (\\\\)* # followed by any even number of '\'
dongsheng@623 1427 )\\t # and followed by an escaped tabulation
dongsheng@623 1428 /$1\t/mgx; # multilines string, match globally, allow comments
dongsheng@623 1429 # and unescape the escape character
dongsheng@623 1430 $text =~ s/\\\\/\\/g;
dongsheng@623 1431 print STDERR ">$text<\n" if $debug{'escape'};
dongsheng@623 1432
dongsheng@623 1433 return $text;
dongsheng@623 1434 }
dongsheng@623 1435
dongsheng@623 1436 # transform the string to its representation as it should be written in po
dongsheng@623 1437 # files
dongsheng@623 1438 sub escape_text {
dongsheng@623 1439 my $text = shift;
dongsheng@623 1440
dongsheng@623 1441 print STDERR "\nescape [$text]====" if $debug{'escape'};
dongsheng@623 1442 $text =~ s/\\/\\\\/g;
dongsheng@623 1443 $text =~ s/"/\\"/g;
dongsheng@623 1444 $text =~ s/\n/\\n/g;
dongsheng@623 1445 $text =~ s/\t/\\t/g;
dongsheng@623 1446 print STDERR ">$text<\n" if $debug{'escape'};
dongsheng@623 1447
dongsheng@623 1448 return $text;
dongsheng@623 1449 }
dongsheng@623 1450
dongsheng@623 1451 # put quotes around the string on each lines (without escaping it)
dongsheng@623 1452 # It does also normalize the text (ie, make sure its representation is wraped
dongsheng@623 1453 # on the 80th char, but without changing the meaning of the string)
dongsheng@623 1454 sub quote_text {
dongsheng@623 1455 my $string = shift;
dongsheng@623 1456
dongsheng@623 1457 return '""' unless defined($string) && length($string);
dongsheng@623 1458
dongsheng@623 1459 print STDERR "\nquote [$string]====" if $debug{'quote'};
dongsheng@623 1460 # break lines on newlines, if any
dongsheng@623 1461 # see unescape_text for an explanation on \G
dongsheng@623 1462 $string =~ s/( # $1:
dongsheng@623 1463 (\G|[^\\]) # beginning of the line or any char
dongsheng@623 1464 # different from '\'
dongsheng@623 1465 (\\\\)* # followed by any even number of '\'
dongsheng@623 1466 \\n) # and followed by an escaped newline
dongsheng@623 1467 /$1\n/sgx; # single string, match globally, allow comments
dongsheng@623 1468 $string = wrap($string);
dongsheng@623 1469 my @string = split(/\n/,$string);
dongsheng@623 1470 $string = join ("\"\n\"",@string);
dongsheng@623 1471 $string = "\"$string\"";
dongsheng@623 1472 if (scalar @string > 1 && $string[0] ne '') {
dongsheng@623 1473 $string = "\"\"\n".$string;
dongsheng@623 1474 }
dongsheng@623 1475
dongsheng@623 1476 print STDERR ">$string<\n" if $debug{'quote'};
dongsheng@623 1477 return $string;
dongsheng@623 1478 }
dongsheng@623 1479
dongsheng@623 1480 # undo the work of the quote_text function
dongsheng@623 1481 sub unquote_text {
dongsheng@623 1482 my $string = shift;
dongsheng@623 1483 print STDERR "\nunquote [$string]====" if $debug{'quote'};
dongsheng@623 1484 $string =~ s/^""\\n//s;
dongsheng@623 1485 $string =~ s/^"(.*)"$/$1/s;
dongsheng@623 1486 $string =~ s/"\n"//gm;
dongsheng@623 1487 # Note: an even number of '\' could precede \\n, but I could not build a
dongsheng@623 1488 # document to test this
dongsheng@623 1489 $string =~ s/([^\\])\\n\n/$1!!DUMMYPOPM!!/gm;
dongsheng@623 1490 $string =~ s|!!DUMMYPOPM!!|\\n|gm;
dongsheng@623 1491 print STDERR ">$string<\n" if $debug{'quote'};
dongsheng@623 1492 return $string;
dongsheng@623 1493 }
dongsheng@623 1494
dongsheng@623 1495 # canonize the string: write it on only one line, changing consecutive
dongsheng@623 1496 # whitespace to only one space.
dongsheng@623 1497 # Warning, it changes the string and should only be called if the string is
dongsheng@623 1498 # plain text
dongsheng@623 1499 sub canonize {
dongsheng@623 1500 my $text=shift;
dongsheng@623 1501 print STDERR "\ncanonize [$text]====" if $debug{'canonize'};
dongsheng@623 1502 $text =~ s/^ *//s;
dongsheng@623 1503 $text =~ s/^[ \t]+/ /gm;
dongsheng@623 1504 # if ($text eq "\n"), it messed up the first string (header)
dongsheng@623 1505 $text =~ s/\n/ /gm if ($text ne "\n");
dongsheng@623 1506 $text =~ s/([.)]) +/$1 /gm;
dongsheng@623 1507 $text =~ s/([^.)]) */$1 /gm;
dongsheng@623 1508 $text =~ s/ *$//s;
dongsheng@623 1509 print STDERR ">$text<\n" if $debug{'canonize'};
dongsheng@623 1510 return $text;
dongsheng@623 1511 }
dongsheng@623 1512
dongsheng@623 1513 # wraps the string. We don't use Text::Wrap since it mangles whitespace at
dongsheng@623 1514 # the end of splited line
dongsheng@623 1515 sub wrap {
dongsheng@623 1516 my $text=shift;
dongsheng@623 1517 return "0" if ($text eq '0');
dongsheng@623 1518 my $col=shift || 76;
dongsheng@623 1519 my @lines=split(/\n/,"$text");
dongsheng@623 1520 my $res="";
dongsheng@623 1521 my $first=1;
dongsheng@623 1522 while (defined(my $line=shift @lines)) {
dongsheng@623 1523 if ($first && length($line) > $col - 10) {
dongsheng@623 1524 unshift @lines,$line;
dongsheng@623 1525 $first=0;
dongsheng@623 1526 next;
dongsheng@623 1527 }
dongsheng@623 1528 if (length($line) > $col) {
dongsheng@623 1529 my $pos=rindex($line," ",$col);
dongsheng@623 1530 while (substr($line,$pos-1,1) eq '.' && $pos != -1) {
dongsheng@623 1531 $pos=rindex($line," ",$pos-1);
dongsheng@623 1532 }
dongsheng@623 1533 if ($pos == -1) {
dongsheng@623 1534 # There are no spaces in the first $col chars, pick-up the
dongsheng@623 1535 # first space
dongsheng@623 1536 $pos = index($line," ");
dongsheng@623 1537 }
dongsheng@623 1538 if ($pos != -1) {
dongsheng@623 1539 my $end=substr($line,$pos+1);
dongsheng@623 1540 $line=substr($line,0,$pos+1);
dongsheng@623 1541 if ($end =~ s/^( +)//) {
dongsheng@623 1542 $line .= $1;
dongsheng@623 1543 }
dongsheng@623 1544 unshift @lines,$end;
dongsheng@623 1545 }
dongsheng@623 1546 }
dongsheng@623 1547 $first=0;
dongsheng@623 1548 $res.="$line\n";
dongsheng@623 1549 }
dongsheng@623 1550 # Restore the original trailing spaces
dongsheng@623 1551 $res =~ s/\s+$//s;
dongsheng@623 1552 if ($text =~ m/(\s+)$/s) {
dongsheng@623 1553 $res .= $1;
dongsheng@623 1554 }
dongsheng@623 1555 return $res;
dongsheng@623 1556 }
dongsheng@623 1557
dongsheng@623 1558 # outputs properly a '# ... ' line to be put in the po file
dongsheng@623 1559 sub format_comment {
dongsheng@623 1560 my $comment=shift;
dongsheng@623 1561 my $char=shift;
dongsheng@623 1562 my $result = "#". $char . $comment;
dongsheng@623 1563 $result =~ s/\n/\n#$char/gs;
dongsheng@623 1564 $result =~ s/^#$char$/#/gm;
dongsheng@623 1565 $result .= "\n";
dongsheng@623 1566 return $result;
dongsheng@623 1567 }
dongsheng@623 1568
dongsheng@623 1569
dongsheng@623 1570 1;
dongsheng@623 1571 __END__
dongsheng@623 1572
dongsheng@623 1573 =back
dongsheng@623 1574
dongsheng@623 1575 =head1 AUTHORS
dongsheng@623 1576
dongsheng@623 1577 Denis Barbier <barbier@linuxfr.org>
dongsheng@623 1578 Martin Quinson (mquinson#debian.org)
dongsheng@623 1579
dongsheng@623 1580 =cut