dongsheng@623: # Locale::Po4a::Po -- manipulation of po files dongsheng@623: # $Id: Po.pm,v 1.95 2009-02-28 22:18:39 nekral-guest Exp $ dongsheng@623: # dongsheng@623: # This program is free software; you may redistribute it and/or modify it dongsheng@623: # under the terms of GPL (see COPYING). dongsheng@623: dongsheng@623: ############################################################################ dongsheng@623: # Modules and declarations dongsheng@623: ############################################################################ dongsheng@623: dongsheng@623: =head1 NAME dongsheng@623: dongsheng@623: Locale::Po4a::Po - po file manipulation module dongsheng@623: dongsheng@623: =head1 SYNOPSIS dongsheng@623: dongsheng@623: use Locale::Po4a::Po; dongsheng@623: my $pofile=Locale::Po4a::Po->new(); dongsheng@623: dongsheng@623: # Read po file dongsheng@623: $pofile->read('file.po'); dongsheng@623: dongsheng@623: # Add an entry dongsheng@623: $pofile->push('msgid' => 'Hello', 'msgstr' => 'bonjour', dongsheng@623: 'flags' => "wrap", 'reference'=>'file.c:46'); dongsheng@623: dongsheng@623: # Extract a translation dongsheng@623: $pofile->gettext("Hello"); # returns 'bonjour' dongsheng@623: dongsheng@623: # Write back to a file dongsheng@623: $pofile->write('otherfile.po'); dongsheng@623: dongsheng@623: =head1 DESCRIPTION dongsheng@623: dongsheng@623: Locale::Po4a::Po is a module that allows you to manipulate message dongsheng@623: catalogs. You can load and write from/to a file (which extension is often dongsheng@623: I), you can build new entries on the fly or request for the translation dongsheng@623: of a string. dongsheng@623: dongsheng@623: For a more complete description of message catalogs in the po format and dongsheng@623: their use, please refer to the documentation of the gettext program. dongsheng@623: dongsheng@623: This module is part of the PO4A project, which objective is to use po files dongsheng@623: (designed at origin to ease the translation of program messages) to dongsheng@623: translate everything, including documentation (man page, info manual), dongsheng@623: package description, debconf templates, and everything which may benefit dongsheng@623: from this. dongsheng@623: dongsheng@623: =head1 OPTIONS ACCEPTED BY THIS MODULE dongsheng@623: dongsheng@623: =over 4 dongsheng@623: dongsheng@623: =item porefs dongsheng@623: dongsheng@623: This specifies the reference format. It can be one of 'none' to not produce dongsheng@623: any reference, 'noline' to not specify the line number, and 'full' to dongsheng@623: include complete references. dongsheng@623: dongsheng@623: =back dongsheng@623: dongsheng@623: =cut dongsheng@623: dongsheng@623: use IO::File; dongsheng@623: dongsheng@623: dongsheng@623: require Exporter; dongsheng@623: dongsheng@623: package Locale::Po4a::Po; dongsheng@623: use DynaLoader; dongsheng@623: dongsheng@623: use Locale::Po4a::Common qw(wrap_msg wrap_mod wrap_ref_mod dgettext); dongsheng@623: dongsheng@623: use subs qw(makespace); dongsheng@623: use vars qw(@ISA @EXPORT_OK); dongsheng@623: @ISA = qw(Exporter DynaLoader); dongsheng@623: @EXPORT = qw(%debug); dongsheng@623: @EXPORT_OK = qw(&move_po_if_needed); dongsheng@623: dongsheng@623: use Locale::Po4a::TransTractor; dongsheng@623: # Try to use a C extension if present. dongsheng@623: eval("bootstrap Locale::Po4a::Po $Locale::Po4a::TransTractor::VERSION"); dongsheng@623: dongsheng@623: use 5.006; dongsheng@623: use strict; dongsheng@623: use warnings; dongsheng@623: dongsheng@623: use Carp qw(croak); dongsheng@623: use File::Path; # mkdir before write dongsheng@623: use File::Copy; # move dongsheng@623: use POSIX qw(strftime floor); dongsheng@623: use Time::Local; dongsheng@623: dongsheng@623: use Encode; dongsheng@623: dongsheng@623: my @known_flags=qw(wrap no-wrap c-format fuzzy); dongsheng@623: dongsheng@623: our %debug=('canonize' => 0, dongsheng@623: 'quote' => 0, dongsheng@623: 'escape' => 0, dongsheng@623: 'encoding' => 0, dongsheng@623: 'filter' => 0); dongsheng@623: dongsheng@623: =head1 Functions about whole message catalogs dongsheng@623: dongsheng@623: =over 4 dongsheng@623: dongsheng@623: =item new() dongsheng@623: dongsheng@623: Creates a new message catalog. If an argument is provided, it's the name of dongsheng@623: a po file we should load. dongsheng@623: dongsheng@623: =cut dongsheng@623: dongsheng@623: sub new { dongsheng@623: my ($this, $options) = (shift, shift); dongsheng@623: my $class = ref($this) || $this; dongsheng@623: my $self = {}; dongsheng@623: bless $self, $class; dongsheng@623: $self->initialize($options); dongsheng@623: dongsheng@623: my $filename = shift; dongsheng@623: $self->read($filename) if defined($filename) && length($filename); dongsheng@623: return $self; dongsheng@623: } dongsheng@623: dongsheng@623: # Return the numerical timezone (e.g. +0200) dongsheng@623: # Neither the %z nor the %s formats of strftime are portable: dongsheng@623: # '%s' is not supported on Solaris and '%z' indicates dongsheng@623: # "2006-10-25 19:36E. Europe Standard Time" on MS Windows. dongsheng@623: sub timezone { dongsheng@623: my @g = gmtime(); dongsheng@623: my @l = localtime(); dongsheng@623: dongsheng@623: my $diff; dongsheng@623: $diff = floor(timelocal(@l)/60 +0.5); dongsheng@623: $diff -= floor(timelocal(@g)/60 +0.5); dongsheng@623: dongsheng@623: my $h = floor($diff / 60) + $l[8]; # $l[8] indicates if we are currently dongsheng@623: # in a daylight saving time zone dongsheng@623: my $m = $diff%60; dongsheng@623: dongsheng@623: return sprintf "%+03d%02d\n", $h, $m; dongsheng@623: } dongsheng@623: dongsheng@623: sub initialize { dongsheng@623: my ($self, $options) = (shift, shift); dongsheng@623: my $date = strftime("%Y-%m-%d %H:%M", localtime).timezone(); dongsheng@623: chomp $date; dongsheng@623: # $options = ref($options) || $options; dongsheng@623: dongsheng@623: $self->{options}{'porefs'}= 'full'; dongsheng@623: $self->{options}{'msgid-bugs-address'}= undef; dongsheng@623: $self->{options}{'copyright-holder'}= "Free Software Foundation, Inc."; dongsheng@623: $self->{options}{'package-name'}= "PACKAGE"; dongsheng@623: $self->{options}{'package-version'}= "VERSION"; dongsheng@623: foreach my $opt (keys %$options) { dongsheng@623: if ($options->{$opt}) { dongsheng@623: die wrap_mod("po4a::po", dongsheng@623: dgettext ("po4a", "Unknown option: %s"), $opt) dongsheng@623: unless exists $self->{options}{$opt}; dongsheng@623: $self->{options}{$opt} = $options->{$opt}; dongsheng@623: } dongsheng@623: } dongsheng@623: $self->{options}{'porefs'} =~ /^(full|noline|none)$/ || dongsheng@623: die wrap_mod("po4a::po", dongsheng@623: dgettext ("po4a", dongsheng@623: "Invalid value for option 'porefs' ('%s' is ". dongsheng@623: "not one of 'full', 'noline' or 'none')"), dongsheng@623: $self->{options}{'porefs'}); dongsheng@623: dongsheng@623: $self->{po}=(); dongsheng@623: $self->{count}=0; # number of msgids in the PO dongsheng@623: # count_doc: number of strings in the document dongsheng@623: # (duplicate strings counted multiple times) dongsheng@623: $self->{count_doc}=0; dongsheng@623: $self->{header_comment}= dongsheng@623: " SOME DESCRIPTIVE TITLE\n" dongsheng@623: ." Copyright (C) YEAR ". dongsheng@623: $self->{options}{'copyright-holder'}."\n" dongsheng@623: ." This file is distributed under the same license ". dongsheng@623: "as the ".$self->{options}{'package-name'}." package.\n" dongsheng@623: ." FIRST AUTHOR , YEAR.\n" dongsheng@623: ."\n" dongsheng@623: .", fuzzy"; dongsheng@623: # $self->header_tag="fuzzy"; dongsheng@623: $self->{header}=escape_text("Project-Id-Version: ". dongsheng@623: $self->{options}{'package-name'}." ". dongsheng@623: $self->{options}{'package-version'}."\n". dongsheng@623: ((defined $self->{options}{'msgid-bugs-address'})? dongsheng@623: "Report-Msgid-Bugs-To: ".$self->{options}{'msgid-bugs-address'}."\n": dongsheng@623: ""). dongsheng@623: "POT-Creation-Date: $date\n". dongsheng@623: "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n". dongsheng@623: "Last-Translator: FULL NAME \n". dongsheng@623: "Language-Team: LANGUAGE \n". dongsheng@623: "MIME-Version: 1.0\n". dongsheng@623: "Content-Type: text/plain; charset=CHARSET\n". dongsheng@623: "Content-Transfer-Encoding: ENCODING"); dongsheng@623: dongsheng@623: $self->{encoder}=find_encoding("ascii"); dongsheng@623: dongsheng@623: # To make stats about gettext hits dongsheng@623: $self->stats_clear(); dongsheng@623: } dongsheng@623: dongsheng@623: =item read($) dongsheng@623: dongsheng@623: Reads a po file (which name is given as argument). Previously existing dongsheng@623: entries in self are not removed, the new ones are added to the end of the dongsheng@623: catalog. dongsheng@623: dongsheng@623: =cut dongsheng@623: dongsheng@623: sub read { dongsheng@623: my $self=shift; dongsheng@623: my $filename=shift dongsheng@623: or croak wrap_mod("po4a::po", dongsheng@623: dgettext("po4a", dongsheng@623: "Please provide a non-null filename")); dongsheng@623: dongsheng@623: my $fh; dongsheng@623: if ($filename eq '-') { dongsheng@623: $fh=*STDIN; dongsheng@623: } else { dongsheng@623: open $fh,"<$filename" dongsheng@623: or croak wrap_mod("po4a::po", dongsheng@623: dgettext("po4a", "Can't read from %s: %s"), dongsheng@623: $filename, $!); dongsheng@623: } dongsheng@623: dongsheng@623: ## Read paragraphs line-by-line dongsheng@623: my $pofile=""; dongsheng@623: my $textline; dongsheng@623: while (defined ($textline = <$fh>)) { dongsheng@623: $pofile .= $textline; dongsheng@623: } dongsheng@623: # close INPUT dongsheng@623: # or croak (sprintf(dgettext("po4a", dongsheng@623: # "Can't close %s after reading: %s"), dongsheng@623: # $filename,$!)."\n"); dongsheng@623: dongsheng@623: my $linenum=0; dongsheng@623: dongsheng@623: foreach my $msg (split (/\n\n/,$pofile)) { dongsheng@623: my ($msgid,$msgstr,$comment,$automatic,$reference,$flags,$buffer); dongsheng@623: my ($msgid_plural, $msgstr_plural); dongsheng@623: foreach my $line (split (/\n/,$msg)) { dongsheng@623: $linenum++; dongsheng@623: if ($line =~ /^#\. ?(.*)$/) { # Automatic comment dongsheng@623: $automatic .= (defined($automatic) ? "\n" : "").$1; dongsheng@623: dongsheng@623: } elsif ($line =~ /^#: ?(.*)$/) { # reference dongsheng@623: $reference .= (defined($reference) ? "\n" : "").$1; dongsheng@623: dongsheng@623: } elsif ($line =~ /^#, ?(.*)$/) { # flags dongsheng@623: $flags .= (defined($flags) ? "\n" : "").$1; dongsheng@623: dongsheng@623: } elsif ($line =~ /^#(.*)$/) { # Translator comments dongsheng@623: $comment .= (defined($comment) ? "\n" : "").($1||""); dongsheng@623: dongsheng@623: } elsif ($line =~ /^msgid (".*")$/) { # begin of msgid dongsheng@623: $buffer = $1; dongsheng@623: dongsheng@623: } elsif ($line =~ /^msgid_plural (".*")$/) { dongsheng@623: # begin of msgid_plural, end of msgid dongsheng@623: dongsheng@623: $msgid = $buffer; dongsheng@623: $buffer = $1; dongsheng@623: dongsheng@623: } elsif ($line =~ /^msgstr (".*")$/) { dongsheng@623: # begin of msgstr, end of msgid dongsheng@623: dongsheng@623: $msgid = $buffer; dongsheng@623: $buffer = "$1"; dongsheng@623: dongsheng@623: } elsif ($line =~ /^msgstr\[([0-9]+)\] (".*")$/) { dongsheng@623: # begin of msgstr[x], end of msgid_plural or msgstr[x-1] dongsheng@623: dongsheng@623: # Note: po4a cannot uses plural forms dongsheng@623: # (no integer to use the plural form) dongsheng@623: # * drop the msgstr[x] where x >= 2 dongsheng@623: # * use msgstr[0] as the translation of msgid dongsheng@623: # * use msgstr[1] as the translation of msgid_plural dongsheng@623: dongsheng@623: if ($1 eq "0") { dongsheng@623: $msgid_plural = $buffer; dongsheng@623: $buffer = "$2"; dongsheng@623: } elsif ($1 eq "1") { dongsheng@623: $msgstr = $buffer; dongsheng@623: $buffer = "$2"; dongsheng@623: } elsif ($1 eq "2") { dongsheng@623: $msgstr_plural = $buffer; dongsheng@623: warn wrap_ref_mod("$filename:$linenum", dongsheng@623: "po4a::po", dongsheng@623: dgettext("po4a", "Messages with more than 2 plural forms are not supported.")); dongsheng@623: } dongsheng@623: } elsif ($line =~ /^(".*")$/) { dongsheng@623: # continuation of a line dongsheng@623: $buffer .= "\n$1"; dongsheng@623: dongsheng@623: } else { dongsheng@623: warn wrap_ref_mod("$filename:$linenum", dongsheng@623: "po4a::po", dongsheng@623: dgettext("po4a", "Strange line: -->%s<--"), dongsheng@623: $line); dongsheng@623: } dongsheng@623: } dongsheng@623: $linenum++; dongsheng@623: if (defined $msgid_plural) { dongsheng@623: $msgstr_plural=$buffer; dongsheng@623: dongsheng@623: $msgid = unquote_text($msgid) if (defined($msgid)); dongsheng@623: $msgstr = unquote_text($msgstr) if (defined($msgstr)); dongsheng@623: dongsheng@623: $self->push_raw ('msgid' => $msgid, dongsheng@623: 'msgstr' => $msgstr, dongsheng@623: 'reference' => $reference, dongsheng@623: 'flags' => $flags, dongsheng@623: 'comment' => $comment, dongsheng@623: 'automatic' => $automatic, dongsheng@623: 'plural' => 0); dongsheng@623: dongsheng@623: $msgid_plural = unquote_text($msgid_plural) dongsheng@623: if (defined($msgid_plural)); dongsheng@623: $msgstr_plural = unquote_text($msgstr_plural) dongsheng@623: if (defined($msgstr_plural)); dongsheng@623: dongsheng@623: $self->push_raw ('msgid' => $msgid_plural, dongsheng@623: 'msgstr' => $msgstr_plural, dongsheng@623: 'reference' => $reference, dongsheng@623: 'flags' => $flags, dongsheng@623: 'comment' => $comment, dongsheng@623: 'automatic' => $automatic, dongsheng@623: 'plural' => 1); dongsheng@623: } else { dongsheng@623: $msgstr=$buffer; dongsheng@623: dongsheng@623: $msgid = unquote_text($msgid) if (defined($msgid)); dongsheng@623: $msgstr = unquote_text($msgstr) if (defined($msgstr)); dongsheng@623: dongsheng@623: $self->push_raw ('msgid' => $msgid, dongsheng@623: 'msgstr' => $msgstr, dongsheng@623: 'reference' => $reference, dongsheng@623: 'flags' => $flags, dongsheng@623: 'comment' => $comment, dongsheng@623: 'automatic' => $automatic); dongsheng@623: } dongsheng@623: } dongsheng@623: } dongsheng@623: dongsheng@623: =item write($) dongsheng@623: dongsheng@623: Writes the current catalog to the given file. dongsheng@623: dongsheng@623: =cut dongsheng@623: dongsheng@623: sub write{ dongsheng@623: my $self=shift; dongsheng@623: my $filename=shift dongsheng@623: or croak dgettext("po4a","Can't write to a file without filename")."\n"; dongsheng@623: dongsheng@623: my $fh; dongsheng@623: if ($filename eq '-') { dongsheng@623: $fh=\*STDOUT; dongsheng@623: } else { dongsheng@623: # make sure the directory in which we should write the localized dongsheng@623: # file exists dongsheng@623: my $dir = $filename; dongsheng@623: if ($dir =~ m|/|) { dongsheng@623: $dir =~ s|/[^/]*$||; dongsheng@623: dongsheng@623: File::Path::mkpath($dir, 0, 0755) # Croaks on error dongsheng@623: if (length ($dir) && ! -e $dir); dongsheng@623: } dongsheng@623: open $fh,">$filename" dongsheng@623: or croak wrap_mod("po4a::po", dongsheng@623: dgettext("po4a", "Can't write to %s: %s"), dongsheng@623: $filename, $!); dongsheng@623: } dongsheng@623: dongsheng@623: print $fh "".format_comment($self->{header_comment},"") dongsheng@623: if defined($self->{header_comment}) && length($self->{header_comment}); dongsheng@623: dongsheng@623: print $fh "msgid \"\"\n"; dongsheng@623: print $fh "msgstr ".quote_text($self->{header})."\n\n"; dongsheng@623: dongsheng@623: dongsheng@623: my $buf_msgstr_plural; # USed to keep the first msgstr of plural forms dongsheng@623: my $first=1; dongsheng@623: foreach my $msgid ( sort { ($self->{po}{"$a"}{'pos'}) <=> dongsheng@623: ($self->{po}{"$b"}{'pos'}) dongsheng@623: } keys %{$self->{po}}) { dongsheng@623: my $output=""; dongsheng@623: dongsheng@623: if ($first) { dongsheng@623: $first=0; dongsheng@623: } else { dongsheng@623: $output .= "\n"; dongsheng@623: } dongsheng@623: dongsheng@623: $output .= format_comment($self->{po}{$msgid}{'comment'},"") dongsheng@623: if defined($self->{po}{$msgid}{'comment'}) dongsheng@623: && length ($self->{po}{$msgid}{'comment'}); dongsheng@623: if ( defined($self->{po}{$msgid}{'automatic'}) dongsheng@623: && length ($self->{po}{$msgid}{'automatic'})) { dongsheng@623: foreach my $comment (split(/\\n/,$self->{po}{$msgid}{'automatic'})) dongsheng@623: { dongsheng@623: $output .= format_comment($comment, ". ") dongsheng@623: } dongsheng@623: } dongsheng@623: $output .= format_comment($self->{po}{$msgid}{'type'},". type: ") dongsheng@623: if defined($self->{po}{$msgid}{'type'}) dongsheng@623: && length ($self->{po}{$msgid}{'type'}); dongsheng@623: $output .= format_comment($self->{po}{$msgid}{'reference'},": ") dongsheng@623: if defined($self->{po}{$msgid}{'reference'}) dongsheng@623: && length ($self->{po}{$msgid}{'reference'}); dongsheng@623: $output .= "#, ". join(", ", sort split(/\s+/,$self->{po}{$msgid}{'flags'}))."\n" dongsheng@623: if defined($self->{po}{$msgid}{'flags'}) dongsheng@623: && length ($self->{po}{$msgid}{'flags'}); dongsheng@623: dongsheng@623: if (exists $self->{po}{$msgid}{'plural'}) { dongsheng@623: if ($self->{po}{$msgid}{'plural'} == 0) { dongsheng@623: if ($self->get_charset =~ /^utf-8$/i) { dongsheng@623: my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'}); dongsheng@623: $msgid = Encode::decode_utf8($msgid); dongsheng@623: $output .= Encode::encode_utf8("msgid ".quote_text($msgid)."\n"); dongsheng@623: $buf_msgstr_plural = Encode::encode_utf8("msgstr[0] ".quote_text($msgstr)."\n"); dongsheng@623: } else { dongsheng@623: $output = "msgid ".quote_text($msgid)."\n"; dongsheng@623: $buf_msgstr_plural = "msgstr[0] ".quote_text($self->{po}{$msgid}{'msgstr'})."\n"; dongsheng@623: } dongsheng@623: } elsif ($self->{po}{$msgid}{'plural'} == 1) { dongsheng@623: # TODO: there may be only one plural form dongsheng@623: if ($self->get_charset =~ /^utf-8$/i) { dongsheng@623: my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'}); dongsheng@623: $msgid = Encode::decode_utf8($msgid); dongsheng@623: $output = Encode::encode_utf8("msgid_plural ".quote_text($msgid)."\n"); dongsheng@623: $output .= $buf_msgstr_plural; dongsheng@623: $output .= Encode::encode_utf8("msgstr[1] ".quote_text($msgstr)."\n"); dongsheng@623: $buf_msgstr_plural = ""; dongsheng@623: } else { dongsheng@623: $output = "msgid_plural ".quote_text($msgid)."\n"; dongsheng@623: $output .= $buf_msgstr_plural; dongsheng@623: $output .= "msgstr[1] ".quote_text($self->{po}{$msgid}{'msgstr'})."\n"; dongsheng@623: } dongsheng@623: } else { dongsheng@623: die wrap_msg(dgettext("po4a","Can't write PO files with more than two plural forms.")); dongsheng@623: } dongsheng@623: } else { dongsheng@623: if ($self->get_charset =~ /^utf-8$/i) { dongsheng@623: my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'}); dongsheng@623: $msgid = Encode::decode_utf8($msgid); dongsheng@623: $output .= Encode::encode_utf8("msgid ".quote_text($msgid)."\n"); dongsheng@623: $output .= Encode::encode_utf8("msgstr ".quote_text($msgstr)."\n"); dongsheng@623: } else { dongsheng@623: $output .= "msgid ".quote_text($msgid)."\n"; dongsheng@623: $output .= "msgstr ".quote_text($self->{po}{$msgid}{'msgstr'})."\n"; dongsheng@623: } dongsheng@623: } dongsheng@623: dongsheng@623: print $fh $output; dongsheng@623: } dongsheng@623: # print STDERR "$fh"; dongsheng@623: # if ($filename ne '-') { dongsheng@623: # close $fh dongsheng@623: # or croak (sprintf(dgettext("po4a", dongsheng@623: # "Can't close %s after writing: %s\n"), dongsheng@623: # $filename,$!)); dongsheng@623: # } dongsheng@623: } dongsheng@623: dongsheng@623: =item write_if_needed($$) dongsheng@623: dongsheng@623: Like write, but if the PO or POT file already exists, the object will be dongsheng@623: written in a temporary file which will be compared with the existing file dongsheng@623: to check that the update is needed (this avoids to change a POT just to dongsheng@623: update a line reference or the POT-Creation-Date field). dongsheng@623: dongsheng@623: =cut dongsheng@623: dongsheng@623: sub move_po_if_needed { dongsheng@623: my ($new_po, $old_po, $backup) = (shift, shift, shift); dongsheng@623: my $diff; dongsheng@623: dongsheng@623: if (-e $old_po) { dongsheng@623: my $diff_ignore = "-I'^#:' " dongsheng@623: ."-I'^\"POT-Creation-Date:' " dongsheng@623: ."-I'^\"PO-Revision-Date:'"; dongsheng@623: $diff = qx(diff -q $diff_ignore $old_po $new_po); dongsheng@623: if ( $diff eq "" ) { dongsheng@623: unlink $new_po dongsheng@623: or die wrap_msg(dgettext("po4a","Can't unlink %s: %s."), dongsheng@623: $new_po, $!); dongsheng@623: # touch the old PO dongsheng@623: my ($atime, $mtime) = (time,time); dongsheng@623: utime $atime, $mtime, $old_po; dongsheng@623: } else { dongsheng@623: if ($backup) { dongsheng@623: copy $old_po, $old_po."~" dongsheng@623: or die wrap_msg(dgettext("po4a","Can't copy %s to %s: %s."), dongsheng@623: $old_po, $old_po."~", $!); dongsheng@623: } else { dongsheng@623: } dongsheng@623: move $new_po, $old_po dongsheng@623: or die wrap_msg(dgettext("po4a","Can't move %s to %s: %s."), dongsheng@623: $new_po, $old_po, $!); dongsheng@623: } dongsheng@623: } else { dongsheng@623: move $new_po, $old_po dongsheng@623: or die wrap_msg(dgettext("po4a","Can't move %s to %s: %s."), dongsheng@623: $new_po, $old_po, $!); dongsheng@623: } dongsheng@623: } dongsheng@623: dongsheng@623: sub write_if_needed { dongsheng@623: my $self=shift; dongsheng@623: my $filename=shift dongsheng@623: or croak dgettext("po4a","Can't write to a file without filename")."\n"; dongsheng@623: dongsheng@623: if (-e $filename) { dongsheng@623: my ($tmp_filename); dongsheng@623: (undef,$tmp_filename)=File::Temp->tempfile($filename."XXXX", dongsheng@623: DIR => "/tmp", dongsheng@623: OPEN => 0, dongsheng@623: UNLINK => 0); dongsheng@623: $self->write($tmp_filename); dongsheng@623: move_po_if_needed($tmp_filename, $filename); dongsheng@623: } else { dongsheng@623: $self->write($filename); dongsheng@623: } dongsheng@623: } dongsheng@623: dongsheng@623: =item gettextize($$) dongsheng@623: dongsheng@623: This function produces one translated message catalog from two catalogs, an dongsheng@623: original and a translation. This process is described in L, dongsheng@623: section I. dongsheng@623: dongsheng@623: =cut dongsheng@623: dongsheng@623: sub gettextize { dongsheng@623: my $this = shift; dongsheng@623: my $class = ref($this) || $this; dongsheng@623: my ($poorig,$potrans)=(shift,shift); dongsheng@623: dongsheng@623: my $pores=Locale::Po4a::Po->new(); dongsheng@623: dongsheng@623: my $please_fail = 0; dongsheng@623: my $toobad = dgettext("po4a", dongsheng@623: "\nThe gettextization failed (once again). Don't give up, ". dongsheng@623: "gettextizing is a subtle art, but this is only needed once ". dongsheng@623: "to convert a project to the gorgeous luxus offered by po4a ". dongsheng@623: "to translators.". dongsheng@623: "\nPlease refer to the po4a(7) documentation, the section ". dongsheng@623: "\"HOWTO convert a pre-existing translation to po4a?\" ". dongsheng@623: "contains several hints to help you in your task"); dongsheng@623: dongsheng@623: # Don't fail right now when the entry count does not match. Instead, give dongsheng@623: # it a try so that the user can see where we fail (which is probably where dongsheng@623: # the problem is). dongsheng@623: if ($poorig->count_entries_doc() > $potrans->count_entries_doc()) { dongsheng@623: warn wrap_mod("po4a gettextize", dgettext("po4a", dongsheng@623: "Original has more strings than the translation (%d>%d). ". dongsheng@623: "Please fix it by editing the translated version to add ". dongsheng@623: "some dummy entry."), dongsheng@623: $poorig->count_entries_doc(), dongsheng@623: $potrans->count_entries_doc()); dongsheng@623: $please_fail = 1; dongsheng@623: } elsif ($poorig->count_entries_doc() < $potrans->count_entries_doc()) { dongsheng@623: warn wrap_mod("po4a gettextize", dgettext("po4a", dongsheng@623: "Original has less strings than the translation (%d<%d). ". dongsheng@623: "Please fix it by removing the extra entry from the ". dongsheng@623: "translated file. You may need an addendum (cf po4a(7)) ". dongsheng@623: "to reput the chunk in place after gettextization. A ". dongsheng@623: "possible cause is that a text duplicated in the original ". dongsheng@623: "is not translated the same way each time. Remove one of ". dongsheng@623: "the translations, and you're fine."), dongsheng@623: $poorig->count_entries_doc(), dongsheng@623: $potrans->count_entries_doc()); dongsheng@623: $please_fail = 1; dongsheng@623: } dongsheng@623: dongsheng@623: if ( $poorig->get_charset =~ /^utf-8$/i ) { dongsheng@623: $potrans->to_utf8; dongsheng@623: $pores->set_charset("utf-8"); dongsheng@623: } else { dongsheng@623: if ($potrans->get_charset eq "CHARSET") { dongsheng@623: $pores->set_charset("ascii"); dongsheng@623: } else { dongsheng@623: $pores->set_charset($potrans->get_charset); dongsheng@623: } dongsheng@623: } dongsheng@623: print "Po character sets:\n". dongsheng@623: " original=".$poorig->get_charset."\n". dongsheng@623: " translated=".$potrans->get_charset."\n". dongsheng@623: " result=".$pores->get_charset."\n" dongsheng@623: if $debug{'encoding'}; dongsheng@623: dongsheng@623: for (my ($o,$t)=(0,0) ; dongsheng@623: $o<$poorig->count_entries_doc() && $t<$potrans->count_entries_doc(); dongsheng@623: $o++,$t++) { dongsheng@623: # dongsheng@623: # Extract some informations dongsheng@623: dongsheng@623: my ($orig,$trans)=($poorig->msgid_doc($o),$potrans->msgid_doc($t)); dongsheng@623: # print STDERR "Matches [[$orig]]<<$trans>>\n"; dongsheng@623: dongsheng@623: my ($reforig,$reftrans)=($poorig->{po}{$orig}{'reference'}, dongsheng@623: $potrans->{po}{$trans}{'reference'}); dongsheng@623: my ($typeorig,$typetrans)=($poorig->{po}{$orig}{'type'}, dongsheng@623: $potrans->{po}{$trans}{'type'}); dongsheng@623: dongsheng@623: # dongsheng@623: # Make sure the type of both string exist dongsheng@623: # dongsheng@623: die wrap_mod("po4a gettextize", dongsheng@623: "Internal error: type of original string number %s ". dongsheng@623: "isn't provided", $o) dongsheng@623: if ($typeorig eq ''); dongsheng@623: dongsheng@623: die wrap_mod("po4a gettextize", dongsheng@623: "Internal error: type of translated string number %s ". dongsheng@623: "isn't provided", $o) dongsheng@623: if ($typetrans eq ''); dongsheng@623: dongsheng@623: # dongsheng@623: # Make sure both type are the same dongsheng@623: # dongsheng@623: if ($typeorig ne $typetrans){ dongsheng@623: $pores->write("gettextization.failed.po"); dongsheng@623: die wrap_msg(dgettext("po4a", dongsheng@623: "po4a gettextization: Structure disparity between ". dongsheng@623: "original and translated files:\n". dongsheng@623: "msgid (at %s) is of type '%s' while\n". dongsheng@623: "msgstr (at %s) is of type '%s'.\n". dongsheng@623: "Original text: %s\n". dongsheng@623: "Translated text: %s\n". dongsheng@623: "(result so far dumped to gettextization.failed.po)"). dongsheng@623: "%s", dongsheng@623: $reforig, $typeorig, dongsheng@623: $reftrans, $typetrans, dongsheng@623: $orig, dongsheng@623: $trans, dongsheng@623: $toobad); dongsheng@623: } dongsheng@623: dongsheng@623: # dongsheng@623: # Push the entry dongsheng@623: # dongsheng@623: my $flags; dongsheng@623: if (defined $poorig->{po}{$orig}{'flags'}) { dongsheng@623: $flags = $poorig->{po}{$orig}{'flags'}." fuzzy"; dongsheng@623: } else { dongsheng@623: $flags = "fuzzy"; dongsheng@623: } dongsheng@623: $pores->push_raw('msgid' => $orig, dongsheng@623: 'msgstr' => $trans, dongsheng@623: 'flags' => $flags, dongsheng@623: 'type' => $typeorig, dongsheng@623: 'reference' => $reforig, dongsheng@623: 'conflict' => 1, dongsheng@623: 'transref' => $potrans->{po}{$trans}{'reference'}) dongsheng@623: unless (defined($pores->{po}{$orig}) dongsheng@623: and ($pores->{po}{$orig}{'msgstr'} eq $trans)) dongsheng@623: # FIXME: maybe we should be smarter about what reference should be dongsheng@623: # sent to push_raw. dongsheng@623: } dongsheng@623: dongsheng@623: # make sure we return a useful error message when entry count differ dongsheng@623: die "$toobad\n" if $please_fail; dongsheng@623: dongsheng@623: return $pores; dongsheng@623: } dongsheng@623: dongsheng@623: =item filter($) dongsheng@623: dongsheng@623: This function extracts a catalog from an existing one. Only the entries having dongsheng@623: a reference in the given file will be placed in the resulting catalog. dongsheng@623: dongsheng@623: This function parses its argument, converts it to a perl function definition, dongsheng@623: eval this definition and filter the fields for which this function returns dongsheng@623: true. dongsheng@623: dongsheng@623: I love perl sometimes ;) dongsheng@623: dongsheng@623: =cut dongsheng@623: dongsheng@623: sub filter { dongsheng@623: my $self=shift; dongsheng@623: our $filter=shift; dongsheng@623: dongsheng@623: my $res; dongsheng@623: $res = Locale::Po4a::Po->new(); dongsheng@623: dongsheng@623: # Parse the filter dongsheng@623: our $code="sub apply { return "; dongsheng@623: our $pos=0; dongsheng@623: our $length = length $filter; dongsheng@623: dongsheng@623: # explode chars to parts. How to subscript a string in Perl? dongsheng@623: our @filter = split(//,$filter); dongsheng@623: dongsheng@623: sub gloups { dongsheng@623: my $fmt=shift; dongsheng@623: my $space = ""; dongsheng@623: for (1..$pos){ dongsheng@623: $space .= ' '; dongsheng@623: } dongsheng@623: die wrap_msg("$fmt\n$filter\n$space^ HERE"); dongsheng@623: } dongsheng@623: sub showmethecode { dongsheng@623: return unless $debug{'filter'}; dongsheng@623: my $fmt=shift; dongsheng@623: my $space=""; dongsheng@623: for (1..$pos){ dongsheng@623: $space .= ' '; dongsheng@623: } dongsheng@623: print STDERR "$filter\n$space^ $fmt\n";#"$code\n"; dongsheng@623: } dongsheng@623: dongsheng@623: # I dream of a lex in perl :-/ dongsheng@623: sub parse_expression { dongsheng@623: showmethecode("Begin expression") dongsheng@623: if $debug{'filter'}; dongsheng@623: dongsheng@623: gloups("Begin of expression expected, got '%s'",$filter[$pos]) dongsheng@623: unless ($filter[$pos] eq '('); dongsheng@623: $pos ++; # pass the '(' dongsheng@623: if ($filter[$pos] eq '&') { dongsheng@623: # AND dongsheng@623: $pos++; dongsheng@623: showmethecode("Begin of AND") dongsheng@623: if $debug{'filter'}; dongsheng@623: $code .= "("; dongsheng@623: while (1) { dongsheng@623: gloups ("Unfinished AND statement.") dongsheng@623: if ($pos == $length); dongsheng@623: parse_expression(); dongsheng@623: if ($filter[$pos] eq '(') { dongsheng@623: $code .= " && "; dongsheng@623: } elsif ($filter[$pos] eq ')') { dongsheng@623: last; # do not eat that char dongsheng@623: } else { dongsheng@623: gloups("End of AND or begin of sub-expression expected, got '%s'", $filter[$pos]); dongsheng@623: } dongsheng@623: } dongsheng@623: $code .= ")"; dongsheng@623: } elsif ($filter[$pos] eq '|') { dongsheng@623: # OR dongsheng@623: $pos++; dongsheng@623: $code .= "("; dongsheng@623: while (1) { dongsheng@623: gloups("Unfinished OR statement.") dongsheng@623: if ($pos == $length); dongsheng@623: parse_expression(); dongsheng@623: if ($filter[$pos] eq '(') { dongsheng@623: $code .= " || "; dongsheng@623: } elsif ($filter[$pos] eq ')') { dongsheng@623: last; # do not eat that char dongsheng@623: } else { dongsheng@623: gloups("End of OR or begin of sub-expression expected, got '%s'",$filter[$pos]); dongsheng@623: } dongsheng@623: } dongsheng@623: $code .= ")"; dongsheng@623: } elsif ($filter[$pos] eq '!') { dongsheng@623: # NOT dongsheng@623: $pos++; dongsheng@623: $code .= "(!"; dongsheng@623: gloups("Missing sub-expression in NOT statement.") dongsheng@623: if ($pos == $length); dongsheng@623: parse_expression(); dongsheng@623: $code .= ")"; dongsheng@623: } else { dongsheng@623: # must be an equal. Let's get field and argument dongsheng@623: my ($field,$arg,$done); dongsheng@623: $field = substr($filter,$pos); dongsheng@623: gloups("EQ statement contains no '=' or invalid field name") dongsheng@623: unless ($field =~ /([a-z]*)=/i); dongsheng@623: $field = lc($1); dongsheng@623: $pos += (length $field) + 1; dongsheng@623: dongsheng@623: # check that we've got a valid field name, dongsheng@623: # and the number it referes to dongsheng@623: # DO NOT CHANGE THE ORDER dongsheng@623: my @names=qw(msgid msgstr reference flags comment automatic); dongsheng@623: my $fieldpos; dongsheng@623: for ($fieldpos = 0; dongsheng@623: $fieldpos < scalar @names && $field ne $names[$fieldpos]; dongsheng@623: $fieldpos++) {} dongsheng@623: gloups("Invalid field name: %s",$field) dongsheng@623: if $fieldpos == scalar @names; # not found dongsheng@623: dongsheng@623: # Now, get the argument value. It has to be between quotes, dongsheng@623: # which can be escaped dongsheng@623: # We point right on the first char of the argument dongsheng@623: # (first quote already eaten) dongsheng@623: my $escaped = 0; dongsheng@623: my $quoted = 0; dongsheng@623: if ($filter[$pos] eq '"') { dongsheng@623: $pos++; dongsheng@623: $quoted = 1; dongsheng@623: } dongsheng@623: showmethecode(($quoted?"Quoted":"Unquoted")." argument of field '$field'") dongsheng@623: if $debug{'filter'}; dongsheng@623: dongsheng@623: while (!$done) { dongsheng@623: gloups("Unfinished EQ argument.") dongsheng@623: if ($pos == $length); dongsheng@623: dongsheng@623: if ($quoted) { dongsheng@623: if ($filter[$pos] eq '\\') { dongsheng@623: if ($escaped) { dongsheng@623: $arg .= '\\'; dongsheng@623: $escaped = 0; dongsheng@623: } else { dongsheng@623: $escaped = 1; dongsheng@623: } dongsheng@623: } elsif ($escaped) { dongsheng@623: if ($filter[$pos] eq '"') { dongsheng@623: $arg .= '"'; dongsheng@623: $escaped = 0; dongsheng@623: } else { dongsheng@623: gloups("Invalid escape sequence in argument: '\\%s'",$filter[$pos]); dongsheng@623: } dongsheng@623: } else { dongsheng@623: if ($filter[$pos] eq '"') { dongsheng@623: $done = 1; dongsheng@623: } else { dongsheng@623: $arg .= $filter[$pos]; dongsheng@623: } dongsheng@623: } dongsheng@623: } else { dongsheng@623: if ($filter[$pos] eq ')') { dongsheng@623: # counter the next ++ since we don't want to eat dongsheng@623: # this char dongsheng@623: $pos--; dongsheng@623: $done = 1; dongsheng@623: } else { dongsheng@623: $arg .= $filter[$pos]; dongsheng@623: } dongsheng@623: } dongsheng@623: $pos++; dongsheng@623: } dongsheng@623: # and now, add the code to check this equality dongsheng@623: $code .= "(\$_[$fieldpos] =~ m/$arg/)"; dongsheng@623: dongsheng@623: } dongsheng@623: showmethecode("End of expression") dongsheng@623: if $debug{'filter'}; dongsheng@623: gloups("Unfinished statement.") dongsheng@623: if ($pos == $length); dongsheng@623: gloups("End of expression expected, got '%s'",$filter[$pos]) dongsheng@623: unless ($filter[$pos] eq ')'); dongsheng@623: $pos++; dongsheng@623: } dongsheng@623: # And now, launch the beast, finish the function and use eval dongsheng@623: # to construct this function. dongsheng@623: # Ok, the lack of lexer is a fair price for the eval ;) dongsheng@623: parse_expression(); dongsheng@623: gloups("Garbage at the end of the expression") dongsheng@623: if ($pos != $length); dongsheng@623: $code .= "; }"; dongsheng@623: print STDERR "CODE = $code\n" dongsheng@623: if $debug{'filter'}; dongsheng@623: eval $code; dongsheng@623: die wrap_mod("po4a::po", dgettext("po4a", "Eval failure: %s"), $@) dongsheng@623: if $@; dongsheng@623: dongsheng@623: for (my $cpt=(0) ; dongsheng@623: $cpt<$self->count_entries(); dongsheng@623: $cpt++) { dongsheng@623: dongsheng@623: my ($msgid,$ref,$msgstr,$flags,$type,$comment,$automatic); dongsheng@623: dongsheng@623: $msgid = $self->msgid($cpt); dongsheng@623: $ref=$self->{po}{$msgid}{'reference'}; dongsheng@623: dongsheng@623: $msgstr= $self->{po}{$msgid}{'msgstr'}; dongsheng@623: $flags = $self->{po}{$msgid}{'flags'}; dongsheng@623: $type = $self->{po}{$msgid}{'type'}; dongsheng@623: $comment = $self->{po}{$msgid}{'comment'}; dongsheng@623: $automatic = $self->{po}{$msgid}{'automatic'}; dongsheng@623: dongsheng@623: # DO NOT CHANGE THE ORDER dongsheng@623: $res->push_raw('msgid' => $msgid, dongsheng@623: 'msgstr' => $msgstr, dongsheng@623: 'flags' => $flags, dongsheng@623: 'type' => $type, dongsheng@623: 'reference' => $ref, dongsheng@623: 'comment' => $comment, dongsheng@623: 'automatic' => $automatic) dongsheng@623: if (apply($msgid,$msgstr,$ref,$flags,$comment,$automatic)); dongsheng@623: } dongsheng@623: # delete the apply subroutine dongsheng@623: # otherwise it will be redefined. dongsheng@623: undef &apply; dongsheng@623: return $res; dongsheng@623: } dongsheng@623: dongsheng@623: =item to_utf8() dongsheng@623: dongsheng@623: Recodes to utf-8 the po's msgstrs. Does nothing if the charset is not dongsheng@623: specified in the po file ("CHARSET" value), or if it's already utf-8 or dongsheng@623: ascii. dongsheng@623: dongsheng@623: =cut dongsheng@623: dongsheng@623: sub to_utf8 { dongsheng@623: my $this = shift; dongsheng@623: my $charset = $this->get_charset(); dongsheng@623: dongsheng@623: unless ($charset eq "CHARSET" or dongsheng@623: $charset =~ /^ascii$/i or dongsheng@623: $charset =~ /^utf-8$/i) { dongsheng@623: foreach my $msgid ( keys %{$this->{po}} ) { dongsheng@623: Encode::from_to($this->{po}{$msgid}{'msgstr'}, $charset, "utf-8"); dongsheng@623: } dongsheng@623: $this->set_charset("utf-8"); dongsheng@623: } dongsheng@623: } dongsheng@623: dongsheng@623: =back dongsheng@623: dongsheng@623: =head1 Functions to use a message catalog for translations dongsheng@623: dongsheng@623: =over 4 dongsheng@623: dongsheng@623: =item gettext($%) dongsheng@623: dongsheng@623: Request the translation of the string given as argument in the current catalog. dongsheng@623: The function returns the original (untranslated) string if the string was not dongsheng@623: found. dongsheng@623: dongsheng@623: After the string to translate, you can pass a hash of extra dongsheng@623: arguments. Here are the valid entries: dongsheng@623: dongsheng@623: =over dongsheng@623: dongsheng@623: =item wrap dongsheng@623: dongsheng@623: boolean indicating whether we can consider that whitespaces in string are dongsheng@623: not important. If yes, the function canonizes the string before looking for dongsheng@623: a translation, and wraps the result. dongsheng@623: dongsheng@623: =item wrapcol dongsheng@623: dongsheng@623: The column at which we should wrap (default: 76). dongsheng@623: dongsheng@623: =back dongsheng@623: dongsheng@623: =cut dongsheng@623: dongsheng@623: sub gettext { dongsheng@623: my $self=shift; dongsheng@623: my $text=shift; dongsheng@623: my (%opt)=@_; dongsheng@623: my $res; dongsheng@623: dongsheng@623: return "" unless defined($text) && length($text); # Avoid returning the header. dongsheng@623: my $validoption="reference wrap wrapcol"; dongsheng@623: my %validoption; dongsheng@623: dongsheng@623: map { $validoption{$_}=1 } (split(/ /,$validoption)); dongsheng@623: foreach (keys %opt) { dongsheng@623: Carp::confess "internal error: unknown arg $_.\n". dongsheng@623: "Here are the valid options: $validoption.\n" dongsheng@623: unless $validoption{$_}; dongsheng@623: } dongsheng@623: dongsheng@623: $text=canonize($text) dongsheng@623: if ($opt{'wrap'}); dongsheng@623: dongsheng@623: my $esc_text=escape_text($text); dongsheng@623: dongsheng@623: $self->{gettextqueries}++; dongsheng@623: dongsheng@623: if ( defined $self->{po}{$esc_text} dongsheng@623: and defined $self->{po}{$esc_text}{'msgstr'} dongsheng@623: and length $self->{po}{$esc_text}{'msgstr'} dongsheng@623: and ( not defined $self->{po}{$esc_text}{'flags'} dongsheng@623: or $self->{po}{$esc_text}{'flags'} !~ /fuzzy/)) { dongsheng@623: dongsheng@623: $self->{gettexthits}++; dongsheng@623: $res = unescape_text($self->{po}{$esc_text}{'msgstr'}); dongsheng@623: if (defined $self->{po}{$esc_text}{'plural'}) { dongsheng@623: if ($self->{po}{$esc_text}{'plural'} eq "0") { dongsheng@623: warn wrap_mod("po4a gettextize", dgettext("po4a", dongsheng@623: "'%s' is the singular form of a message, ". dongsheng@623: "po4a will use the msgstr[0] translation (%s)."), dongsheng@623: $esc_text, $res); dongsheng@623: } else { dongsheng@623: warn wrap_mod("po4a gettextize", dgettext("po4a", dongsheng@623: "'%s' is the plural form of a message, ". dongsheng@623: "po4a will use the msgstr[1] translation (%s)."), dongsheng@623: $esc_text, $res); dongsheng@623: } dongsheng@623: } dongsheng@623: } else { dongsheng@623: $res = $text; dongsheng@623: } dongsheng@623: dongsheng@623: if ($opt{'wrap'}) { dongsheng@623: if ($self->get_charset =~ /^utf-8$/i) { dongsheng@623: $res=Encode::decode_utf8($res); dongsheng@623: $res=wrap ($res, $opt{'wrapcol'} || 76); dongsheng@623: $res=Encode::encode_utf8($res); dongsheng@623: } else { dongsheng@623: $res=wrap ($res, $opt{'wrapcol'} || 76); dongsheng@623: } dongsheng@623: } dongsheng@623: # print STDERR "Gettext >>>$text<<<(escaped=$esc_text)=[[[$res]]]\n\n"; dongsheng@623: return $res; dongsheng@623: } dongsheng@623: dongsheng@623: =item stats_get() dongsheng@623: dongsheng@623: Returns statistics about the hit ratio of gettext since the last time that dongsheng@623: stats_clear() was called. Please note that it's not the same dongsheng@623: statistics than the one printed by msgfmt --statistic. Here, it's statistics dongsheng@623: about recent usage of the po file, while msgfmt reports the status of the dongsheng@623: file. Example of use: dongsheng@623: dongsheng@623: [some use of the po file to translate stuff] dongsheng@623: dongsheng@623: ($percent,$hit,$queries) = $pofile->stats_get(); dongsheng@623: print "So far, we found translations for $percent\% ($hit of $queries) of strings.\n"; dongsheng@623: dongsheng@623: =cut dongsheng@623: dongsheng@623: sub stats_get() { dongsheng@623: my $self=shift; dongsheng@623: my ($h,$q)=($self->{gettexthits},$self->{gettextqueries}); dongsheng@623: my $p = ($q == 0 ? 100 : int($h/$q*10000)/100); dongsheng@623: dongsheng@623: # $p =~ s/\.00//; dongsheng@623: # $p =~ s/(\..)0/$1/; dongsheng@623: dongsheng@623: return ( $p,$h,$q ); dongsheng@623: } dongsheng@623: dongsheng@623: =item stats_clear() dongsheng@623: dongsheng@623: Clears the statistics about gettext hits. dongsheng@623: dongsheng@623: =cut dongsheng@623: dongsheng@623: sub stats_clear { dongsheng@623: my $self = shift; dongsheng@623: $self->{gettextqueries} = 0; dongsheng@623: $self->{gettexthits} = 0; dongsheng@623: } dongsheng@623: dongsheng@623: =back dongsheng@623: dongsheng@623: =head1 Functions to build a message catalog dongsheng@623: dongsheng@623: =over 4 dongsheng@623: dongsheng@623: =item push(%) dongsheng@623: dongsheng@623: Push a new entry at the end of the current catalog. The arguments should dongsheng@623: form a hash table. The valid keys are: dongsheng@623: dongsheng@623: =over 4 dongsheng@623: dongsheng@623: =item msgid dongsheng@623: dongsheng@623: the string in original language. dongsheng@623: dongsheng@623: =item msgstr dongsheng@623: dongsheng@623: the translation. dongsheng@623: dongsheng@623: =item reference dongsheng@623: dongsheng@623: an indication of where this string was found. Example: file.c:46 (meaning dongsheng@623: in 'file.c' at line 46). It can be a space-separated list in case of dongsheng@623: multiple occurrences. dongsheng@623: dongsheng@623: =item comment dongsheng@623: dongsheng@623: a comment added here manually (by the translators). The format here is free. dongsheng@623: dongsheng@623: =item automatic dongsheng@623: dongsheng@623: a comment which was automatically added by the string extraction dongsheng@623: program. See the I<--add-comments> option of the B program for dongsheng@623: more information. dongsheng@623: dongsheng@623: =item flags dongsheng@623: dongsheng@623: space-separated list of all defined flags for this entry. dongsheng@623: dongsheng@623: Valid flags are: c-text, python-text, lisp-text, elisp-text, librep-text, dongsheng@623: smalltalk-text, java-text, awk-text, object-pascal-text, ycp-text, dongsheng@623: tcl-text, wrap, no-wrap and fuzzy. dongsheng@623: dongsheng@623: See the gettext documentation for their meaning. dongsheng@623: dongsheng@623: =item type dongsheng@623: dongsheng@623: This is mostly an internal argument: it is used while gettextizing dongsheng@623: documents. The idea here is to parse both the original and the translation dongsheng@623: into a po object, and merge them, using one's msgid as msgid and the dongsheng@623: other's msgid as msgstr. To make sure that things get ok, each msgid in po dongsheng@623: objects are given a type, based on their structure (like "chapt", "sect1", dongsheng@623: "p" and so on in docbook). If the types of strings are not the same, that dongsheng@623: means that both files do not share the same structure, and the process dongsheng@623: reports an error. dongsheng@623: dongsheng@623: This information is written as automatic comment in the po file since this dongsheng@623: gives to translators some context about the strings to translate. dongsheng@623: dongsheng@623: =item wrap dongsheng@623: dongsheng@623: boolean indicating whether whitespaces can be mangled in cosmetic dongsheng@623: reformattings. If true, the string is canonized before use. dongsheng@623: dongsheng@623: This information is written to the po file using the 'wrap' or 'no-wrap' flag. dongsheng@623: dongsheng@623: =item wrapcol dongsheng@623: dongsheng@623: The column at which we should wrap (default: 76). dongsheng@623: dongsheng@623: This information is not written to the po file. dongsheng@623: dongsheng@623: =back dongsheng@623: dongsheng@623: =cut dongsheng@623: dongsheng@623: sub push { dongsheng@623: my $self=shift; dongsheng@623: my %entry=@_; dongsheng@623: dongsheng@623: my $validoption="wrap wrapcol type msgid msgstr automatic flags reference"; dongsheng@623: my %validoption; dongsheng@623: dongsheng@623: map { $validoption{$_}=1 } (split(/ /,$validoption)); dongsheng@623: foreach (keys %entry) { dongsheng@623: Carp::confess "internal error: unknown arg $_.\n". dongsheng@623: "Here are the valid options: $validoption.\n" dongsheng@623: unless $validoption{$_}; dongsheng@623: } dongsheng@623: dongsheng@623: unless ($entry{'wrap'}) { dongsheng@623: $entry{'flags'} .= " no-wrap"; dongsheng@623: } dongsheng@623: if (defined ($entry{'msgid'})) { dongsheng@623: $entry{'msgid'} = canonize($entry{'msgid'}) dongsheng@623: if ($entry{'wrap'}); dongsheng@623: dongsheng@623: $entry{'msgid'} = escape_text($entry{'msgid'}); dongsheng@623: } dongsheng@623: if (defined ($entry{'msgstr'})) { dongsheng@623: $entry{'msgstr'} = canonize($entry{'msgstr'}) dongsheng@623: if ($entry{'wrap'}); dongsheng@623: dongsheng@623: $entry{'msgstr'} = escape_text($entry{'msgstr'}); dongsheng@623: } dongsheng@623: dongsheng@623: $self->push_raw(%entry); dongsheng@623: } dongsheng@623: dongsheng@623: # The same as push(), but assuming that msgid and msgstr are already escaped dongsheng@623: sub push_raw { dongsheng@623: my $self=shift; dongsheng@623: my %entry=@_; dongsheng@623: my ($msgid,$msgstr,$reference,$comment,$automatic,$flags,$type,$transref)= dongsheng@623: ($entry{'msgid'},$entry{'msgstr'}, dongsheng@623: $entry{'reference'},$entry{'comment'},$entry{'automatic'}, dongsheng@623: $entry{'flags'},$entry{'type'},$entry{'transref'}); dongsheng@623: my $keep_conflict = $entry{'conflict'}; dongsheng@623: dongsheng@623: # print STDERR "Push_raw\n"; dongsheng@623: # print STDERR " msgid=>>>$msgid<<<\n" if $msgid; dongsheng@623: # print STDERR " msgstr=[[[$msgstr]]]\n" if $msgstr; dongsheng@623: # Carp::cluck " flags=$flags\n" if $flags; dongsheng@623: dongsheng@623: return unless defined($entry{'msgid'}); dongsheng@623: dongsheng@623: #no msgid => header definition dongsheng@623: unless (length($entry{'msgid'})) { dongsheng@623: # if (defined($self->{header}) && $self->{header} =~ /\S/) { dongsheng@623: # warn dgettext("po4a","Redefinition of the header. ". dongsheng@623: # "The old one will be discarded\n"); dongsheng@623: # } FIXME: do that iff the header isn't the default one. dongsheng@623: $self->{header}=$msgstr; dongsheng@623: $self->{header_comment}=$comment; dongsheng@623: my $charset = $self->get_charset; dongsheng@623: if ($charset ne "CHARSET") { dongsheng@623: $self->{encoder}=find_encoding($charset); dongsheng@623: } else { dongsheng@623: $self->{encoder}=find_encoding("ascii"); dongsheng@623: } dongsheng@623: return; dongsheng@623: } dongsheng@623: dongsheng@623: if ($self->{options}{'porefs'} eq "none") { dongsheng@623: $reference = ""; dongsheng@623: } elsif ($self->{options}{'porefs'} eq "noline") { dongsheng@623: $reference =~ s/:[0-9]*/:1/g; dongsheng@623: } dongsheng@623: dongsheng@623: if (defined($self->{po}{$msgid})) { dongsheng@623: warn wrap_mod("po4a::po", dongsheng@623: dgettext("po4a","msgid defined twice: %s"), dongsheng@623: $msgid) dongsheng@623: if (0); # FIXME: put a verbose stuff dongsheng@623: if ( defined $msgstr dongsheng@623: and defined $self->{po}{$msgid}{'msgstr'} dongsheng@623: and $self->{po}{$msgid}{'msgstr'} ne $msgstr) { dongsheng@623: my $txt=quote_text($msgid); dongsheng@623: my ($first,$second)= dongsheng@623: (format_comment(". ",$self->{po}{$msgid}{'reference'}). dongsheng@623: quote_text($self->{po}{$msgid}{'msgstr'}), dongsheng@623: dongsheng@623: format_comment(". ",$reference). dongsheng@623: quote_text($msgstr)); dongsheng@623: dongsheng@623: if ($keep_conflict) { dongsheng@623: if ($self->{po}{$msgid}{'msgstr'} =~ m/^#-#-#-#-# .* #-#-#-#-#\\n/s) { dongsheng@623: $msgstr = $self->{po}{$msgid}{'msgstr'}. dongsheng@623: "\\n#-#-#-#-# $transref #-#-#-#-#\\n". dongsheng@623: $msgstr; dongsheng@623: } else { dongsheng@623: $msgstr = "#-#-#-#-# ". dongsheng@623: $self->{po}{$msgid}{'transref'}. dongsheng@623: " #-#-#-#-#\\n". dongsheng@623: $self->{po}{$msgid}{'msgstr'}."\\n". dongsheng@623: "#-#-#-#-# $transref #-#-#-#-#\\n". dongsheng@623: $msgstr; dongsheng@623: } dongsheng@623: # Every msgid will have the same list of references. dongsheng@623: # Only keep the last list. dongsheng@623: $self->{po}{$msgid}{'reference'} = ""; dongsheng@623: } else { dongsheng@623: warn wrap_msg(dgettext("po4a", dongsheng@623: "Translations don't match for:\n". dongsheng@623: "%s\n". dongsheng@623: "-->First translation:\n". dongsheng@623: "%s\n". dongsheng@623: " Second translation:\n". dongsheng@623: "%s\n". dongsheng@623: " Old translation discarded."), dongsheng@623: $txt,$first,$second); dongsheng@623: } dongsheng@623: } dongsheng@623: } dongsheng@623: if (defined $transref) { dongsheng@623: $self->{po}{$msgid}{'transref'} = $transref; dongsheng@623: } dongsheng@623: if (defined $reference) { dongsheng@623: if (defined $self->{po}{$msgid}{'reference'}) { dongsheng@623: $self->{po}{$msgid}{'reference'} .= " ".$reference; dongsheng@623: } else { dongsheng@623: $self->{po}{$msgid}{'reference'} = $reference; dongsheng@623: } dongsheng@623: } dongsheng@623: $self->{po}{$msgid}{'msgstr'} = $msgstr; dongsheng@623: $self->{po}{$msgid}{'comment'} = $comment; dongsheng@623: $self->{po}{$msgid}{'automatic'} = $automatic; dongsheng@623: if (defined($self->{po}{$msgid}{'pos_doc'})) { dongsheng@623: $self->{po}{$msgid}{'pos_doc'} .= " ".$self->{count_doc}++; dongsheng@623: } else { dongsheng@623: $self->{po}{$msgid}{'pos_doc'} = $self->{count_doc}++; dongsheng@623: } dongsheng@623: unless (defined($self->{po}{$msgid}{'pos'})) { dongsheng@623: $self->{po}{$msgid}{'pos'} = $self->{count}++; dongsheng@623: } dongsheng@623: $self->{po}{$msgid}{'type'} = $type; dongsheng@623: $self->{po}{$msgid}{'plural'} = $entry{'plural'} dongsheng@623: if defined $entry{'plural'}; dongsheng@623: dongsheng@623: if (defined($flags)) { dongsheng@623: $flags = " $flags "; dongsheng@623: $flags =~ s/,/ /g; dongsheng@623: foreach my $flag (@known_flags) { dongsheng@623: if ($flags =~ /\s$flag\s/) { # if flag to be set dongsheng@623: unless ( defined($self->{po}{$msgid}{'flags'}) dongsheng@623: && $self->{po}{$msgid}{'flags'} =~ /\b$flag\b/) { dongsheng@623: # flag not already set dongsheng@623: if (defined $self->{po}{$msgid}{'flags'}) { dongsheng@623: $self->{po}{$msgid}{'flags'} .= " ".$flag; dongsheng@623: } else { dongsheng@623: $self->{po}{$msgid}{'flags'} = $flag; dongsheng@623: } dongsheng@623: } dongsheng@623: } dongsheng@623: } dongsheng@623: } dongsheng@623: # print STDERR "stored ((($msgid)))=>(((".$self->{po}{$msgid}{'msgstr'}.")))\n\n"; dongsheng@623: dongsheng@623: } dongsheng@623: dongsheng@623: =back dongsheng@623: dongsheng@623: =head1 Miscellaneous functions dongsheng@623: dongsheng@623: =over 4 dongsheng@623: dongsheng@623: =item count_entries() dongsheng@623: dongsheng@623: Returns the number of entries in the catalog (without the header). dongsheng@623: dongsheng@623: =cut dongsheng@623: dongsheng@623: sub count_entries($) { dongsheng@623: my $self=shift; dongsheng@623: return $self->{count}; dongsheng@623: } dongsheng@623: dongsheng@623: =item count_entries_doc() dongsheng@623: dongsheng@623: Returns the number of entries in document. If a string appears multiple times dongsheng@623: in the document, it will be counted multiple times dongsheng@623: dongsheng@623: =cut dongsheng@623: dongsheng@623: sub count_entries_doc($) { dongsheng@623: my $self=shift; dongsheng@623: return $self->{count_doc}; dongsheng@623: } dongsheng@623: dongsheng@623: =item msgid($) dongsheng@623: dongsheng@623: Returns the msgid of the given number. dongsheng@623: dongsheng@623: =cut dongsheng@623: dongsheng@623: sub msgid($$) { dongsheng@623: my $self=shift; dongsheng@623: my $num=shift; dongsheng@623: dongsheng@623: foreach my $msgid ( keys %{$self->{po}} ) { dongsheng@623: return $msgid if ($self->{po}{$msgid}{'pos'} eq $num); dongsheng@623: } dongsheng@623: return undef; dongsheng@623: } dongsheng@623: dongsheng@623: =item msgid_doc($) dongsheng@623: dongsheng@623: Returns the msgid with the given position in the document. dongsheng@623: dongsheng@623: =cut dongsheng@623: dongsheng@623: sub msgid_doc($$) { dongsheng@623: my $self=shift; dongsheng@623: my $num=shift; dongsheng@623: dongsheng@623: foreach my $msgid ( keys %{$self->{po}} ) { dongsheng@623: foreach my $pos (split / /, $self->{po}{$msgid}{'pos_doc'}) { dongsheng@623: return $msgid if ($pos eq $num); dongsheng@623: } dongsheng@623: } dongsheng@623: return undef; dongsheng@623: } dongsheng@623: dongsheng@623: =item get_charset() dongsheng@623: dongsheng@623: Returns the character set specified in the po header. If it hasn't been dongsheng@623: set, it will return "CHARSET". dongsheng@623: dongsheng@623: =cut dongsheng@623: dongsheng@623: sub get_charset() { dongsheng@623: my $self=shift; dongsheng@623: dongsheng@623: $self->{header} =~ /charset=(.*?)[\s\\]/; dongsheng@623: dongsheng@623: if (defined $1) { dongsheng@623: return $1; dongsheng@623: } else { dongsheng@623: return "CHARSET"; dongsheng@623: } dongsheng@623: } dongsheng@623: dongsheng@623: =item set_charset($) dongsheng@623: dongsheng@623: This sets the character set of the po header to the value specified in its dongsheng@623: first argument. If you never call this function (and no file with a specified dongsheng@623: character set is read), the default value is left to "CHARSET". This value dongsheng@623: doesn't change the behavior of this module, it's just used to fill that field dongsheng@623: in the header, and to return it in get_charset(). dongsheng@623: dongsheng@623: =cut dongsheng@623: dongsheng@623: sub set_charset() { dongsheng@623: my $self=shift; dongsheng@623: dongsheng@623: my ($newchar,$oldchar); dongsheng@623: $newchar = shift; dongsheng@623: $oldchar = $self->get_charset(); dongsheng@623: dongsheng@623: $self->{header} =~ s/$oldchar/$newchar/; dongsheng@623: $self->{encoder}=find_encoding($newchar); dongsheng@623: } dongsheng@623: dongsheng@623: #----[ helper functions ]--------------------------------------------------- dongsheng@623: dongsheng@623: # transforme the string from its po file representation to the form which dongsheng@623: # should be used to print it dongsheng@623: sub unescape_text { dongsheng@623: my $text = shift; dongsheng@623: dongsheng@623: print STDERR "\nunescape [$text]====" if $debug{'escape'}; dongsheng@623: $text = join("",split(/\n/,$text)); dongsheng@623: $text =~ s/\\"/"/g; dongsheng@623: # unescape newlines dongsheng@623: # NOTE on \G: dongsheng@623: # The following regular expression introduce newlines. dongsheng@623: # Thus, ^ doesn't match all beginnings of lines. dongsheng@623: # \G is a zero-width assertion that matches the position dongsheng@623: # of the previous substitution with s///g. As every dongsheng@623: # substitution ends by a newline, it always matches a dongsheng@623: # position just after a newline. dongsheng@623: $text =~ s/( # $1: dongsheng@623: (\G|[^\\]) # beginning of the line or any char dongsheng@623: # different from '\' dongsheng@623: (\\\\)* # followed by any even number of '\' dongsheng@623: )\\n # and followed by an escaped newline dongsheng@623: /$1\n/sgx; # single string, match globally, allow comments dongsheng@623: # unescape tabulations dongsheng@623: $text =~ s/( # $1: dongsheng@623: (\G|[^\\])# beginning of the line or any char dongsheng@623: # different from '\' dongsheng@623: (\\\\)* # followed by any even number of '\' dongsheng@623: )\\t # and followed by an escaped tabulation dongsheng@623: /$1\t/mgx; # multilines string, match globally, allow comments dongsheng@623: # and unescape the escape character dongsheng@623: $text =~ s/\\\\/\\/g; dongsheng@623: print STDERR ">$text<\n" if $debug{'escape'}; dongsheng@623: dongsheng@623: return $text; dongsheng@623: } dongsheng@623: dongsheng@623: # transform the string to its representation as it should be written in po dongsheng@623: # files dongsheng@623: sub escape_text { dongsheng@623: my $text = shift; dongsheng@623: dongsheng@623: print STDERR "\nescape [$text]====" if $debug{'escape'}; dongsheng@623: $text =~ s/\\/\\\\/g; dongsheng@623: $text =~ s/"/\\"/g; dongsheng@623: $text =~ s/\n/\\n/g; dongsheng@623: $text =~ s/\t/\\t/g; dongsheng@623: print STDERR ">$text<\n" if $debug{'escape'}; dongsheng@623: dongsheng@623: return $text; dongsheng@623: } dongsheng@623: dongsheng@623: # put quotes around the string on each lines (without escaping it) dongsheng@623: # It does also normalize the text (ie, make sure its representation is wraped dongsheng@623: # on the 80th char, but without changing the meaning of the string) dongsheng@623: sub quote_text { dongsheng@623: my $string = shift; dongsheng@623: dongsheng@623: return '""' unless defined($string) && length($string); dongsheng@623: dongsheng@623: print STDERR "\nquote [$string]====" if $debug{'quote'}; dongsheng@623: # break lines on newlines, if any dongsheng@623: # see unescape_text for an explanation on \G dongsheng@623: $string =~ s/( # $1: dongsheng@623: (\G|[^\\]) # beginning of the line or any char dongsheng@623: # different from '\' dongsheng@623: (\\\\)* # followed by any even number of '\' dongsheng@623: \\n) # and followed by an escaped newline dongsheng@623: /$1\n/sgx; # single string, match globally, allow comments dongsheng@623: $string = wrap($string); dongsheng@623: my @string = split(/\n/,$string); dongsheng@623: $string = join ("\"\n\"",@string); dongsheng@623: $string = "\"$string\""; dongsheng@623: if (scalar @string > 1 && $string[0] ne '') { dongsheng@623: $string = "\"\"\n".$string; dongsheng@623: } dongsheng@623: dongsheng@623: print STDERR ">$string<\n" if $debug{'quote'}; dongsheng@623: return $string; dongsheng@623: } dongsheng@623: dongsheng@623: # undo the work of the quote_text function dongsheng@623: sub unquote_text { dongsheng@623: my $string = shift; dongsheng@623: print STDERR "\nunquote [$string]====" if $debug{'quote'}; dongsheng@623: $string =~ s/^""\\n//s; dongsheng@623: $string =~ s/^"(.*)"$/$1/s; dongsheng@623: $string =~ s/"\n"//gm; dongsheng@623: # Note: an even number of '\' could precede \\n, but I could not build a dongsheng@623: # document to test this dongsheng@623: $string =~ s/([^\\])\\n\n/$1!!DUMMYPOPM!!/gm; dongsheng@623: $string =~ s|!!DUMMYPOPM!!|\\n|gm; dongsheng@623: print STDERR ">$string<\n" if $debug{'quote'}; dongsheng@623: return $string; dongsheng@623: } dongsheng@623: dongsheng@623: # canonize the string: write it on only one line, changing consecutive dongsheng@623: # whitespace to only one space. dongsheng@623: # Warning, it changes the string and should only be called if the string is dongsheng@623: # plain text dongsheng@623: sub canonize { dongsheng@623: my $text=shift; dongsheng@623: print STDERR "\ncanonize [$text]====" if $debug{'canonize'}; dongsheng@623: $text =~ s/^ *//s; dongsheng@623: $text =~ s/^[ \t]+/ /gm; dongsheng@623: # if ($text eq "\n"), it messed up the first string (header) dongsheng@623: $text =~ s/\n/ /gm if ($text ne "\n"); dongsheng@623: $text =~ s/([.)]) +/$1 /gm; dongsheng@623: $text =~ s/([^.)]) */$1 /gm; dongsheng@623: $text =~ s/ *$//s; dongsheng@623: print STDERR ">$text<\n" if $debug{'canonize'}; dongsheng@623: return $text; dongsheng@623: } dongsheng@623: dongsheng@623: # wraps the string. We don't use Text::Wrap since it mangles whitespace at dongsheng@623: # the end of splited line dongsheng@623: sub wrap { dongsheng@623: my $text=shift; dongsheng@623: return "0" if ($text eq '0'); dongsheng@623: my $col=shift || 76; dongsheng@623: my @lines=split(/\n/,"$text"); dongsheng@623: my $res=""; dongsheng@623: my $first=1; dongsheng@623: while (defined(my $line=shift @lines)) { dongsheng@623: if ($first && length($line) > $col - 10) { dongsheng@623: unshift @lines,$line; dongsheng@623: $first=0; dongsheng@623: next; dongsheng@623: } dongsheng@623: if (length($line) > $col) { dongsheng@623: my $pos=rindex($line," ",$col); dongsheng@623: while (substr($line,$pos-1,1) eq '.' && $pos != -1) { dongsheng@623: $pos=rindex($line," ",$pos-1); dongsheng@623: } dongsheng@623: if ($pos == -1) { dongsheng@623: # There are no spaces in the first $col chars, pick-up the dongsheng@623: # first space dongsheng@623: $pos = index($line," "); dongsheng@623: } dongsheng@623: if ($pos != -1) { dongsheng@623: my $end=substr($line,$pos+1); dongsheng@623: $line=substr($line,0,$pos+1); dongsheng@623: if ($end =~ s/^( +)//) { dongsheng@623: $line .= $1; dongsheng@623: } dongsheng@623: unshift @lines,$end; dongsheng@623: } dongsheng@623: } dongsheng@623: $first=0; dongsheng@623: $res.="$line\n"; dongsheng@623: } dongsheng@623: # Restore the original trailing spaces dongsheng@623: $res =~ s/\s+$//s; dongsheng@623: if ($text =~ m/(\s+)$/s) { dongsheng@623: $res .= $1; dongsheng@623: } dongsheng@623: return $res; dongsheng@623: } dongsheng@623: dongsheng@623: # outputs properly a '# ... ' line to be put in the po file dongsheng@623: sub format_comment { dongsheng@623: my $comment=shift; dongsheng@623: my $char=shift; dongsheng@623: my $result = "#". $char . $comment; dongsheng@623: $result =~ s/\n/\n#$char/gs; dongsheng@623: $result =~ s/^#$char$/#/gm; dongsheng@623: $result .= "\n"; dongsheng@623: return $result; dongsheng@623: } dongsheng@623: dongsheng@623: dongsheng@623: 1; dongsheng@623: __END__ dongsheng@623: dongsheng@623: =back dongsheng@623: dongsheng@623: =head1 AUTHORS dongsheng@623: dongsheng@623: Denis Barbier dongsheng@623: Martin Quinson (mquinson#debian.org) dongsheng@623: dongsheng@623: =cut