rev |
line source |
dongsheng@623
|
1 #!/usr/bin/perl -w
|
dongsheng@623
|
2
|
dongsheng@623
|
3 require Exporter;
|
dongsheng@623
|
4
|
dongsheng@623
|
5 package Locale::Po4a::TransTractor;
|
dongsheng@623
|
6 use DynaLoader;
|
dongsheng@623
|
7
|
dongsheng@623
|
8 use 5.006;
|
dongsheng@623
|
9 use strict;
|
dongsheng@623
|
10 use warnings;
|
dongsheng@623
|
11
|
dongsheng@623
|
12 use subs qw(makespace);
|
dongsheng@623
|
13 use vars qw($VERSION @ISA @EXPORT);
|
dongsheng@623
|
14 $VERSION="0.36";
|
dongsheng@623
|
15 @ISA = qw(DynaLoader);
|
dongsheng@623
|
16 @EXPORT = qw(new process translate
|
dongsheng@623
|
17 read write readpo writepo
|
dongsheng@623
|
18 getpoout setpoout);
|
dongsheng@623
|
19
|
dongsheng@623
|
20 # Try to use a C extension if present.
|
dongsheng@623
|
21 eval("bootstrap Locale::Po4a::TransTractor $VERSION");
|
dongsheng@623
|
22
|
dongsheng@623
|
23 use Carp qw(croak);
|
dongsheng@623
|
24 use Locale::Po4a::Po;
|
dongsheng@623
|
25 use Locale::Po4a::Common;
|
dongsheng@623
|
26
|
dongsheng@623
|
27 use File::Path; # mkdir before write
|
dongsheng@623
|
28
|
dongsheng@623
|
29 use Encode;
|
dongsheng@623
|
30 use Encode::Guess;
|
dongsheng@623
|
31
|
dongsheng@623
|
32 =head1 NAME
|
dongsheng@623
|
33
|
dongsheng@623
|
34 Locale::Po4a::TransTractor - Generic trans(lator ex)tractor.
|
dongsheng@623
|
35
|
dongsheng@623
|
36 =head1 DESCRIPTION
|
dongsheng@623
|
37
|
dongsheng@623
|
38 The po4a (po for anything) project goal is to ease translations (and more
|
dongsheng@623
|
39 interestingly, the maintenance of translations) using gettext tools on
|
dongsheng@623
|
40 areas where they were not expected like documentation.
|
dongsheng@623
|
41
|
dongsheng@623
|
42 This class is the ancestor of every po4a parsers used to parse a document to
|
dongsheng@623
|
43 search translatable strings, extract them to a po file and replace them by
|
dongsheng@623
|
44 their translation in the output document.
|
dongsheng@623
|
45
|
dongsheng@623
|
46 More formally, it takes the following arguments as input:
|
dongsheng@623
|
47
|
dongsheng@623
|
48 =over 2
|
dongsheng@623
|
49
|
dongsheng@623
|
50 =item -
|
dongsheng@623
|
51
|
dongsheng@623
|
52 a document to translate ;
|
dongsheng@623
|
53
|
dongsheng@623
|
54 =item -
|
dongsheng@623
|
55
|
dongsheng@623
|
56 a po file containing the translations to use.
|
dongsheng@623
|
57
|
dongsheng@623
|
58 =back
|
dongsheng@623
|
59
|
dongsheng@623
|
60 As output, it produces:
|
dongsheng@623
|
61
|
dongsheng@623
|
62 =over 2
|
dongsheng@623
|
63
|
dongsheng@623
|
64 =item -
|
dongsheng@623
|
65
|
dongsheng@623
|
66 another po file, resulting of the extraction of translatable strings from
|
dongsheng@623
|
67 the input document ;
|
dongsheng@623
|
68
|
dongsheng@623
|
69 =item -
|
dongsheng@623
|
70
|
dongsheng@623
|
71 a translated document, with the same structure than the one in input, but
|
dongsheng@623
|
72 with all translatable strings replaced with the translations found in the
|
dongsheng@623
|
73 po file provided in input.
|
dongsheng@623
|
74
|
dongsheng@623
|
75 =back
|
dongsheng@623
|
76
|
dongsheng@623
|
77 Here is a graphical representation of this:
|
dongsheng@623
|
78
|
dongsheng@623
|
79 Input document --\ /---> Output document
|
dongsheng@623
|
80 \ / (translated)
|
dongsheng@623
|
81 +-> parse() function -----+
|
dongsheng@623
|
82 / \
|
dongsheng@623
|
83 Input po --------/ \---> Output po
|
dongsheng@623
|
84 (extracted)
|
dongsheng@623
|
85
|
dongsheng@623
|
86 =head1 FUNCTIONS YOUR PARSER SHOULD OVERRIDE
|
dongsheng@623
|
87
|
dongsheng@623
|
88 =over 4
|
dongsheng@623
|
89
|
dongsheng@623
|
90 =item parse()
|
dongsheng@623
|
91
|
dongsheng@623
|
92 This is where all the work takes place: the parsing of input documents, the
|
dongsheng@623
|
93 generation of output, and the extraction of the translatable strings. This
|
dongsheng@623
|
94 is pretty simple using the provided functions presented in the section
|
dongsheng@623
|
95 "INTERNAL FUNCTIONS" below. See also the synopsis, which present an
|
dongsheng@623
|
96 example.
|
dongsheng@623
|
97
|
dongsheng@623
|
98 This function is called by the process() function bellow, but if you choose
|
dongsheng@623
|
99 to use the new() function, and to add content manually to your document,
|
dongsheng@623
|
100 you will have to call this function yourself.
|
dongsheng@623
|
101
|
dongsheng@623
|
102 =item docheader()
|
dongsheng@623
|
103
|
dongsheng@623
|
104 This function returns the header we should add to the produced document,
|
dongsheng@623
|
105 quoted properly to be a comment in the target language. See the section
|
dongsheng@623
|
106 "Educating developers about translations", from L<po4a(7)|po4a.7>, for what
|
dongsheng@623
|
107 it is good for.
|
dongsheng@623
|
108
|
dongsheng@623
|
109 =back
|
dongsheng@623
|
110
|
dongsheng@623
|
111 =cut
|
dongsheng@623
|
112
|
dongsheng@623
|
113 sub docheader {}
|
dongsheng@623
|
114
|
dongsheng@623
|
115 sub parse {}
|
dongsheng@623
|
116
|
dongsheng@623
|
117 =head1 SYNOPSIS
|
dongsheng@623
|
118
|
dongsheng@623
|
119 The following example parses a list of paragraphs beginning with "<p>". For the sake
|
dongsheng@623
|
120 of simplicity, we assume that the document is well formatted, i.e. that '<p>'
|
dongsheng@623
|
121 tags are the only tags present, and that this tag is at the very beginning
|
dongsheng@623
|
122 of each paragraph.
|
dongsheng@623
|
123
|
dongsheng@623
|
124 sub parse {
|
dongsheng@623
|
125 my $self = shift;
|
dongsheng@623
|
126
|
dongsheng@623
|
127 PARAGRAPH: while (1) {
|
dongsheng@623
|
128 my ($paragraph,$pararef)=("","");
|
dongsheng@623
|
129 my $first=1;
|
dongsheng@623
|
130 my ($line,$lref)=$self->shiftline();
|
dongsheng@623
|
131 while (defined($line)) {
|
dongsheng@623
|
132 if ($line =~ m/<p>/ && !$first--; ) {
|
dongsheng@623
|
133 # Not the first time we see <p>.
|
dongsheng@623
|
134 # Reput the current line in input,
|
dongsheng@623
|
135 # and put the built paragraph to output
|
dongsheng@623
|
136 $self->unshiftline($line,$lref);
|
dongsheng@623
|
137
|
dongsheng@623
|
138 # Now that the document is formed, translate it:
|
dongsheng@623
|
139 # - Remove the leading tag
|
dongsheng@623
|
140 $paragraph =~ s/^<p>//s;
|
dongsheng@623
|
141
|
dongsheng@623
|
142 # - push to output the leading tag (untranslated) and the
|
dongsheng@623
|
143 # rest of the paragraph (translated)
|
dongsheng@623
|
144 $self->pushline( "<p>"
|
dongsheng@623
|
145 . $document->translate($paragraph,$pararef)
|
dongsheng@623
|
146 );
|
dongsheng@623
|
147
|
dongsheng@623
|
148 next PARAGRAPH;
|
dongsheng@623
|
149 } else {
|
dongsheng@623
|
150 # Append to the paragraph
|
dongsheng@623
|
151 $paragraph .= $line;
|
dongsheng@623
|
152 $pararef = $lref unless(length($pararef));
|
dongsheng@623
|
153 }
|
dongsheng@623
|
154
|
dongsheng@623
|
155 # Reinit the loop
|
dongsheng@623
|
156 ($line,$lref)=$self->shiftline();
|
dongsheng@623
|
157 }
|
dongsheng@623
|
158 # Did not get a defined line? End of input file.
|
dongsheng@623
|
159 return;
|
dongsheng@623
|
160 }
|
dongsheng@623
|
161 }
|
dongsheng@623
|
162
|
dongsheng@623
|
163 Once you've implemented the parse function, you can use your document
|
dongsheng@623
|
164 class, using the public interface presented in the next section.
|
dongsheng@623
|
165
|
dongsheng@623
|
166 =head1 PUBLIC INTERFACE for scripts using your parser
|
dongsheng@623
|
167
|
dongsheng@623
|
168 =head2 Constructor
|
dongsheng@623
|
169
|
dongsheng@623
|
170 =over 4
|
dongsheng@623
|
171
|
dongsheng@623
|
172 =item process(%)
|
dongsheng@623
|
173
|
dongsheng@623
|
174 This function can do all you need to do with a po4a document in one
|
dongsheng@623
|
175 invocation. Its arguments must be packed as a hash. ACTIONS:
|
dongsheng@623
|
176
|
dongsheng@623
|
177 =over 3
|
dongsheng@623
|
178
|
dongsheng@623
|
179 =item a.
|
dongsheng@623
|
180
|
dongsheng@623
|
181 Reads all the po files specified in po_in_name
|
dongsheng@623
|
182
|
dongsheng@623
|
183 =item b.
|
dongsheng@623
|
184
|
dongsheng@623
|
185 Reads all original documents specified in file_in_name
|
dongsheng@623
|
186
|
dongsheng@623
|
187 =item c.
|
dongsheng@623
|
188
|
dongsheng@623
|
189 Parses the document
|
dongsheng@623
|
190
|
dongsheng@623
|
191 =item d.
|
dongsheng@623
|
192
|
dongsheng@623
|
193 Reads and applies all the addenda specified
|
dongsheng@623
|
194
|
dongsheng@623
|
195 =item e.
|
dongsheng@623
|
196
|
dongsheng@623
|
197 Writes the translated document to file_out_name (if given)
|
dongsheng@623
|
198
|
dongsheng@623
|
199 =item f.
|
dongsheng@623
|
200
|
dongsheng@623
|
201 Writes the extracted po file to po_out_name (if given)
|
dongsheng@623
|
202
|
dongsheng@623
|
203 =back
|
dongsheng@623
|
204
|
dongsheng@623
|
205 ARGUMENTS, beside the ones accepted by new() (with expected type):
|
dongsheng@623
|
206
|
dongsheng@623
|
207 =over 4
|
dongsheng@623
|
208
|
dongsheng@623
|
209 =item file_in_name (@)
|
dongsheng@623
|
210
|
dongsheng@623
|
211 List of filenames where we should read the input document.
|
dongsheng@623
|
212
|
dongsheng@623
|
213 =item file_in_charset ($)
|
dongsheng@623
|
214
|
dongsheng@623
|
215 Charset used in the input document (if it isn't specified, it will try
|
dongsheng@623
|
216 to detect it from the input document).
|
dongsheng@623
|
217
|
dongsheng@623
|
218 =item file_out_name ($)
|
dongsheng@623
|
219
|
dongsheng@623
|
220 Filename where we should write the output document.
|
dongsheng@623
|
221
|
dongsheng@623
|
222 =item file_out_charset ($)
|
dongsheng@623
|
223
|
dongsheng@623
|
224 Charset used in the output document (if it isn't specified, it will use
|
dongsheng@623
|
225 the po file charset).
|
dongsheng@623
|
226
|
dongsheng@623
|
227 =item po_in_name (@)
|
dongsheng@623
|
228
|
dongsheng@623
|
229 List of filenames where we should read the input po files from, containing
|
dongsheng@623
|
230 the translation which will be used to translate the document.
|
dongsheng@623
|
231
|
dongsheng@623
|
232 =item po_out_name ($)
|
dongsheng@623
|
233
|
dongsheng@623
|
234 Filename where we should write the output po file, containing the strings
|
dongsheng@623
|
235 extracted from the input document.
|
dongsheng@623
|
236
|
dongsheng@623
|
237 =item addendum (@)
|
dongsheng@623
|
238
|
dongsheng@623
|
239 List of filenames where we should read the addenda from.
|
dongsheng@623
|
240
|
dongsheng@623
|
241 =item addendum_charset ($)
|
dongsheng@623
|
242
|
dongsheng@623
|
243 Charset for the addenda.
|
dongsheng@623
|
244
|
dongsheng@623
|
245 =back
|
dongsheng@623
|
246
|
dongsheng@623
|
247 =item new(%)
|
dongsheng@623
|
248
|
dongsheng@623
|
249 Create a new Po4a document. Accepted options (but be in a hash):
|
dongsheng@623
|
250
|
dongsheng@623
|
251 =over 4
|
dongsheng@623
|
252
|
dongsheng@623
|
253 =item verbose ($)
|
dongsheng@623
|
254
|
dongsheng@623
|
255 Sets the verbosity.
|
dongsheng@623
|
256
|
dongsheng@623
|
257 =item debug ($)
|
dongsheng@623
|
258
|
dongsheng@623
|
259 Sets the debugging.
|
dongsheng@623
|
260
|
dongsheng@623
|
261 =back
|
dongsheng@623
|
262
|
dongsheng@623
|
263 =cut
|
dongsheng@623
|
264
|
dongsheng@623
|
265 sub process {
|
dongsheng@623
|
266 ## Determine if we were called via an object-ref or a classname
|
dongsheng@623
|
267 my $self = shift;
|
dongsheng@623
|
268
|
dongsheng@623
|
269 ## Any remaining arguments are treated as initial values for the
|
dongsheng@623
|
270 ## hash that is used to represent this object.
|
dongsheng@623
|
271 my %params = @_;
|
dongsheng@623
|
272
|
dongsheng@623
|
273 # Build the args for new()
|
dongsheng@623
|
274 my %newparams = ();
|
dongsheng@623
|
275 foreach (keys %params) {
|
dongsheng@623
|
276 next if ($_ eq 'po_in_name' ||
|
dongsheng@623
|
277 $_ eq 'po_out_name' ||
|
dongsheng@623
|
278 $_ eq 'file_in_name' ||
|
dongsheng@623
|
279 $_ eq 'file_in_charset' ||
|
dongsheng@623
|
280 $_ eq 'file_out_name' ||
|
dongsheng@623
|
281 $_ eq 'file_out_charset' ||
|
dongsheng@623
|
282 $_ eq 'addendum' ||
|
dongsheng@623
|
283 $_ eq 'addendum_charset');
|
dongsheng@623
|
284 $newparams{$_}=$params{$_};
|
dongsheng@623
|
285 }
|
dongsheng@623
|
286
|
dongsheng@623
|
287 $self->detected_charset($params{'file_in_charset'});
|
dongsheng@623
|
288 $self->{TT}{'file_out_charset'}=$params{'file_out_charset'};
|
dongsheng@623
|
289 if (defined($self->{TT}{'file_out_charset'}) and
|
dongsheng@623
|
290 length($self->{TT}{'file_out_charset'})) {
|
dongsheng@623
|
291 $self->{TT}{'file_out_encoder'} = find_encoding($self->{TT}{'file_out_charset'});
|
dongsheng@623
|
292 }
|
dongsheng@623
|
293 $self->{TT}{'addendum_charset'}=$params{'addendum_charset'};
|
dongsheng@623
|
294
|
dongsheng@623
|
295 foreach my $file (@{$params{'po_in_name'}}) {
|
dongsheng@623
|
296 print STDERR "readpo($file)... " if $self->debug();
|
dongsheng@623
|
297 $self->readpo($file);
|
dongsheng@623
|
298 print STDERR "done.\n" if $self->debug()
|
dongsheng@623
|
299 }
|
dongsheng@623
|
300 foreach my $file (@{$params{'file_in_name'}}) {
|
dongsheng@623
|
301 print STDERR "read($file)..." if $self->debug();
|
dongsheng@623
|
302 $self->read($file);
|
dongsheng@623
|
303 print STDERR "done.\n" if $self->debug();
|
dongsheng@623
|
304 }
|
dongsheng@623
|
305 print STDERR "parse..." if $self->debug();
|
dongsheng@623
|
306 $self->parse();
|
dongsheng@623
|
307 print STDERR "done.\n" if $self->debug();
|
dongsheng@623
|
308 foreach my $file (@{$params{'addendum'}}) {
|
dongsheng@623
|
309 print STDERR "addendum($file)..." if $self->debug();
|
dongsheng@623
|
310 $self->addendum($file) || die "An addendum failed\n";
|
dongsheng@623
|
311 print STDERR "done.\n" if $self->debug();
|
dongsheng@623
|
312 }
|
dongsheng@623
|
313 if (defined $params{'file_out_name'}) {
|
dongsheng@623
|
314 print STDERR "write(".$params{'file_out_name'}.")... "
|
dongsheng@623
|
315 if $self->debug();
|
dongsheng@623
|
316 $self->write($params{'file_out_name'});
|
dongsheng@623
|
317 print STDERR "done.\n" if $self->debug();
|
dongsheng@623
|
318 }
|
dongsheng@623
|
319 if (defined $params{'po_out_name'}) {
|
dongsheng@623
|
320 print STDERR "writepo(".$params{'po_out_name'}.")... "
|
dongsheng@623
|
321 if $self->debug();
|
dongsheng@623
|
322 $self->writepo($params{'po_out_name'});
|
dongsheng@623
|
323 print STDERR "done.\n" if $self->debug();
|
dongsheng@623
|
324 }
|
dongsheng@623
|
325 return $self;
|
dongsheng@623
|
326 }
|
dongsheng@623
|
327
|
dongsheng@623
|
328 sub new {
|
dongsheng@623
|
329 ## Determine if we were called via an object-ref or a classname
|
dongsheng@623
|
330 my $this = shift;
|
dongsheng@623
|
331 my $class = ref($this) || $this;
|
dongsheng@623
|
332 my $self = { };
|
dongsheng@623
|
333 my %options=@_;
|
dongsheng@623
|
334 ## Bless ourselves into the desired class and perform any initialization
|
dongsheng@623
|
335 bless $self, $class;
|
dongsheng@623
|
336
|
dongsheng@623
|
337 ## initialize the plugin
|
dongsheng@623
|
338 # prevent the plugin from croaking on the options intended for Po.pm
|
dongsheng@623
|
339 $self->{options}{'porefs'} = '';
|
dongsheng@623
|
340 # let the plugin parse the options and such
|
dongsheng@623
|
341 $self->initialize(%options);
|
dongsheng@623
|
342
|
dongsheng@623
|
343 ## Create our private data
|
dongsheng@623
|
344 my %po_options;
|
dongsheng@623
|
345 $po_options{'porefs'} = $self->{options}{'porefs'};
|
dongsheng@623
|
346
|
dongsheng@623
|
347 # private data
|
dongsheng@623
|
348 $self->{TT}=();
|
dongsheng@623
|
349 $self->{TT}{po_in}=Locale::Po4a::Po->new();
|
dongsheng@623
|
350 $self->{TT}{po_out}=Locale::Po4a::Po->new(\%po_options);
|
dongsheng@623
|
351 # Warning, this is an array of array:
|
dongsheng@623
|
352 # The document is splited on lines, and for each
|
dongsheng@623
|
353 # [0] is the line content, [1] is the reference [2] the type
|
dongsheng@623
|
354 $self->{TT}{doc_in}=();
|
dongsheng@623
|
355 $self->{TT}{doc_out}=();
|
dongsheng@623
|
356 if (defined $options{'verbose'}) {
|
dongsheng@623
|
357 $self->{TT}{verbose} = $options{'verbose'};
|
dongsheng@623
|
358 }
|
dongsheng@623
|
359 if (defined $options{'debug'}) {
|
dongsheng@623
|
360 $self->{TT}{debug} = $options{'debug'};
|
dongsheng@623
|
361 }
|
dongsheng@623
|
362 # Input document is in ascii until we prove the opposite (in read())
|
dongsheng@623
|
363 $self->{TT}{ascii_input}=1;
|
dongsheng@623
|
364 # We try not to use utf unless it's forced from the outside (in case the
|
dongsheng@623
|
365 # document isn't in ascii)
|
dongsheng@623
|
366 $self->{TT}{utf_mode}=0;
|
dongsheng@623
|
367
|
dongsheng@623
|
368
|
dongsheng@623
|
369 return $self;
|
dongsheng@623
|
370 }
|
dongsheng@623
|
371
|
dongsheng@623
|
372 =back
|
dongsheng@623
|
373
|
dongsheng@623
|
374 =head2 Manipulating document files
|
dongsheng@623
|
375
|
dongsheng@623
|
376 =over 4
|
dongsheng@623
|
377
|
dongsheng@623
|
378 =item read($)
|
dongsheng@623
|
379
|
dongsheng@623
|
380 Add another input document at the end of the existing one. The argument is
|
dongsheng@623
|
381 the filename to read.
|
dongsheng@623
|
382
|
dongsheng@623
|
383 Please note that it does not parse anything. You should use the parse()
|
dongsheng@623
|
384 function when you're done with packing input files into the document.
|
dongsheng@623
|
385
|
dongsheng@623
|
386 =cut
|
dongsheng@623
|
387
|
dongsheng@623
|
388 #'
|
dongsheng@623
|
389 sub read() {
|
dongsheng@623
|
390 my $self=shift;
|
dongsheng@623
|
391 my $filename=shift
|
dongsheng@623
|
392 or croak wrap_msg(dgettext("po4a", "Can't read from file without having a filename"));
|
dongsheng@623
|
393 my $linenum=0;
|
dongsheng@623
|
394
|
dongsheng@623
|
395 open INPUT,"<$filename"
|
dongsheng@623
|
396 or croak wrap_msg(dgettext("po4a", "Can't read from %s: %s"), $filename, $!);
|
dongsheng@623
|
397 while (defined (my $textline = <INPUT>)) {
|
dongsheng@623
|
398 $linenum++;
|
dongsheng@623
|
399 my $ref="$filename:$linenum";
|
dongsheng@623
|
400 my @entry=($textline,$ref);
|
dongsheng@623
|
401 push @{$self->{TT}{doc_in}}, @entry;
|
dongsheng@623
|
402
|
dongsheng@623
|
403 if (!defined($self->{TT}{'file_in_charset'})) {
|
dongsheng@623
|
404 # Detect if this file has non-ascii characters
|
dongsheng@623
|
405 if($self->{TT}{ascii_input}) {
|
dongsheng@623
|
406 my $decoder = guess_encoding($textline);
|
dongsheng@623
|
407 if (!ref($decoder) or $decoder !~ /Encode::XS=/) {
|
dongsheng@623
|
408 # We have detected a non-ascii line
|
dongsheng@623
|
409 $self->{TT}{ascii_input} = 0;
|
dongsheng@623
|
410 # Save the reference for future error message
|
dongsheng@623
|
411 $self->{TT}{non_ascii_ref} ||= $ref;
|
dongsheng@623
|
412 }
|
dongsheng@623
|
413 }
|
dongsheng@623
|
414 }
|
dongsheng@623
|
415 }
|
dongsheng@623
|
416 close INPUT
|
dongsheng@623
|
417 or croak wrap_msg(dgettext("po4a", "Can't close %s after reading: %s"), $filename, $!);
|
dongsheng@623
|
418
|
dongsheng@623
|
419 }
|
dongsheng@623
|
420
|
dongsheng@623
|
421 =item write($)
|
dongsheng@623
|
422
|
dongsheng@623
|
423 Write the translated document to the given filename.
|
dongsheng@623
|
424
|
dongsheng@623
|
425 =cut
|
dongsheng@623
|
426
|
dongsheng@623
|
427 sub write {
|
dongsheng@623
|
428 my $self=shift;
|
dongsheng@623
|
429 my $filename=shift
|
dongsheng@623
|
430 or croak wrap_msg(dgettext("po4a", "Can't write to a file without filename"));
|
dongsheng@623
|
431
|
dongsheng@623
|
432 my $fh;
|
dongsheng@623
|
433 if ($filename eq '-') {
|
dongsheng@623
|
434 $fh=\*STDOUT;
|
dongsheng@623
|
435 } else {
|
dongsheng@623
|
436 # make sure the directory in which we should write the localized file exists
|
dongsheng@623
|
437 my $dir = $filename;
|
dongsheng@623
|
438 if ($dir =~ m|/|) {
|
dongsheng@623
|
439 $dir =~ s|/[^/]*$||;
|
dongsheng@623
|
440
|
dongsheng@623
|
441 File::Path::mkpath($dir, 0, 0755) # Croaks on error
|
dongsheng@623
|
442 if (length ($dir) && ! -e $dir);
|
dongsheng@623
|
443 }
|
dongsheng@623
|
444 open $fh,">$filename"
|
dongsheng@623
|
445 or croak wrap_msg(dgettext("po4a", "Can't write to %s: %s"), $filename, $!);
|
dongsheng@623
|
446 }
|
dongsheng@623
|
447
|
dongsheng@623
|
448 map { print $fh $_ } $self->docheader();
|
dongsheng@623
|
449 map { print $fh $_ } @{$self->{TT}{doc_out}};
|
dongsheng@623
|
450
|
dongsheng@623
|
451 if ($filename ne '-') {
|
dongsheng@623
|
452 close $fh or croak wrap_msg(dgettext("po4a", "Can't close %s after writing: %s"), $filename, $!);
|
dongsheng@623
|
453 }
|
dongsheng@623
|
454
|
dongsheng@623
|
455 }
|
dongsheng@623
|
456
|
dongsheng@623
|
457 =back
|
dongsheng@623
|
458
|
dongsheng@623
|
459 =head2 Manipulating po files
|
dongsheng@623
|
460
|
dongsheng@623
|
461 =over 4
|
dongsheng@623
|
462
|
dongsheng@623
|
463 =item readpo($)
|
dongsheng@623
|
464
|
dongsheng@623
|
465 Add the content of a file (which name is passed in argument) to the
|
dongsheng@623
|
466 existing input po. The old content is not discarded.
|
dongsheng@623
|
467
|
dongsheng@623
|
468 =item writepo($)
|
dongsheng@623
|
469
|
dongsheng@623
|
470 Write the extracted po file to the given filename.
|
dongsheng@623
|
471
|
dongsheng@623
|
472 =item stats()
|
dongsheng@623
|
473
|
dongsheng@623
|
474 Returns some statistics about the translation done so far. Please note that
|
dongsheng@623
|
475 it's not the same statistics than the one printed by msgfmt
|
dongsheng@623
|
476 --statistic. Here, it's stats about recent usage of the po file, while
|
dongsheng@623
|
477 msgfmt reports the status of the file. It is a wrapper to the
|
dongsheng@623
|
478 Locale::Po4a::Po::stats_get function applied to the input po file. Example
|
dongsheng@623
|
479 of use:
|
dongsheng@623
|
480
|
dongsheng@623
|
481 [normal use of the po4a document...]
|
dongsheng@623
|
482
|
dongsheng@623
|
483 ($percent,$hit,$queries) = $document->stats();
|
dongsheng@623
|
484 print "We found translations for $percent\% ($hit from $queries) of strings.\n";
|
dongsheng@623
|
485
|
dongsheng@623
|
486 =back
|
dongsheng@623
|
487
|
dongsheng@623
|
488 =cut
|
dongsheng@623
|
489
|
dongsheng@623
|
490 sub getpoout {
|
dongsheng@623
|
491 return $_[0]->{TT}{po_out};
|
dongsheng@623
|
492 }
|
dongsheng@623
|
493 sub setpoout {
|
dongsheng@623
|
494 $_[0]->{TT}{po_out} = $_[1];
|
dongsheng@623
|
495 }
|
dongsheng@623
|
496 sub readpo {
|
dongsheng@623
|
497 $_[0]->{TT}{po_in}->read($_[1]);
|
dongsheng@623
|
498 }
|
dongsheng@623
|
499 sub writepo {
|
dongsheng@623
|
500 $_[0]->{TT}{po_out}->write( $_[1] );
|
dongsheng@623
|
501 }
|
dongsheng@623
|
502 sub stats {
|
dongsheng@623
|
503 return $_[0]->{TT}{po_in}->stats_get();
|
dongsheng@623
|
504 }
|
dongsheng@623
|
505
|
dongsheng@623
|
506 =head2 Manipulating addenda
|
dongsheng@623
|
507
|
dongsheng@623
|
508 =over 4
|
dongsheng@623
|
509
|
dongsheng@623
|
510 =item addendum($)
|
dongsheng@623
|
511
|
dongsheng@623
|
512 Please refer to L<po4a(7)|po4a.7> for more information on what addenda are,
|
dongsheng@623
|
513 and how translators should write them. To apply an addendum to the translated
|
dongsheng@623
|
514 document, simply pass its filename to this function and you are done ;)
|
dongsheng@623
|
515
|
dongsheng@623
|
516 This function returns a non-null integer on error.
|
dongsheng@623
|
517
|
dongsheng@623
|
518 =cut
|
dongsheng@623
|
519
|
dongsheng@623
|
520 # Internal function to read the header.
|
dongsheng@623
|
521 sub addendum_parse {
|
dongsheng@623
|
522 my ($filename,$header)=shift;
|
dongsheng@623
|
523
|
dongsheng@623
|
524 my ($errcode,$mode,$position,$boundary,$bmode,$content)=
|
dongsheng@623
|
525 (1,"","","","","");
|
dongsheng@623
|
526
|
dongsheng@623
|
527 unless (open (INS, "<$filename")) {
|
dongsheng@623
|
528 warn wrap_msg(dgettext("po4a", "Can't read from %s: %s"), $filename, $!);
|
dongsheng@623
|
529 goto END_PARSE_ADDFILE;
|
dongsheng@623
|
530 }
|
dongsheng@623
|
531
|
dongsheng@623
|
532 unless (defined ($header=<INS>) && $header) {
|
dongsheng@623
|
533 warn wrap_msg(dgettext("po4a", "Can't read Po4a header from %s."), $filename);
|
dongsheng@623
|
534 goto END_PARSE_ADDFILE;
|
dongsheng@623
|
535 }
|
dongsheng@623
|
536
|
dongsheng@623
|
537 unless ($header =~ s/PO4A-HEADER://i) {
|
dongsheng@623
|
538 warn wrap_msg(dgettext("po4a", "First line of %s does not look like a Po4a header."), $filename);
|
dongsheng@623
|
539 goto END_PARSE_ADDFILE;
|
dongsheng@623
|
540 }
|
dongsheng@623
|
541 foreach my $part (split(/;/,$header)) {
|
dongsheng@623
|
542 unless ($part =~ m/^\s*([^=]*)=(.*)$/) {
|
dongsheng@623
|
543 warn wrap_msg(dgettext("po4a", "Syntax error in Po4a header of %s, near \"%s\""), $filename, $part);
|
dongsheng@623
|
544 goto END_PARSE_ADDFILE;
|
dongsheng@623
|
545 }
|
dongsheng@623
|
546 my ($key,$value)=($1,$2);
|
dongsheng@623
|
547 $key=lc($key);
|
dongsheng@623
|
548 if ($key eq 'mode') { $mode=lc($value);
|
dongsheng@623
|
549 } elsif ($key eq 'position') { $position=$value;
|
dongsheng@623
|
550 } elsif ($key eq 'endboundary') {
|
dongsheng@623
|
551 $boundary=$value;
|
dongsheng@623
|
552 $bmode='after';
|
dongsheng@623
|
553 } elsif ($key eq 'beginboundary') {
|
dongsheng@623
|
554 $boundary=$value;
|
dongsheng@623
|
555 $bmode='before';
|
dongsheng@623
|
556 } else {
|
dongsheng@623
|
557 warn wrap_msg(dgettext("po4a", "Invalid argument in the Po4a header of %s: %s"), $filename, $key);
|
dongsheng@623
|
558 goto END_PARSE_ADDFILE;
|
dongsheng@623
|
559 }
|
dongsheng@623
|
560 }
|
dongsheng@623
|
561
|
dongsheng@623
|
562 unless (length($mode)) {
|
dongsheng@623
|
563 warn wrap_msg(dgettext("po4a", "The Po4a header of %s does not define the mode."), $filename);
|
dongsheng@623
|
564 goto END_PARSE_ADDFILE;
|
dongsheng@623
|
565 }
|
dongsheng@623
|
566 unless ($mode eq "before" || $mode eq "after") {
|
dongsheng@623
|
567 warn wrap_msg(dgettext("po4a", "Mode invalid in the Po4a header of %s: should be 'before' or 'after' not %s."), $filename, $mode);
|
dongsheng@623
|
568 goto END_PARSE_ADDFILE;
|
dongsheng@623
|
569 }
|
dongsheng@623
|
570
|
dongsheng@623
|
571 unless (length($position)) {
|
dongsheng@623
|
572 warn wrap_msg(dgettext("po4a", "The Po4a header of %s does not define the position."), $filename);
|
dongsheng@623
|
573 goto END_PARSE_ADDFILE;
|
dongsheng@623
|
574 }
|
dongsheng@623
|
575 unless ($mode eq "before" || length($boundary)) {
|
dongsheng@623
|
576 warn wrap_msg(dgettext("po4a", "No ending boundary given in the Po4a header, but mode=after."));
|
dongsheng@623
|
577 goto END_PARSE_ADDFILE;
|
dongsheng@623
|
578 }
|
dongsheng@623
|
579
|
dongsheng@623
|
580 while (defined(my $line = <INS>)) {
|
dongsheng@623
|
581 $content .= $line;
|
dongsheng@623
|
582 }
|
dongsheng@623
|
583 close INS;
|
dongsheng@623
|
584
|
dongsheng@623
|
585 $errcode=0;
|
dongsheng@623
|
586 END_PARSE_ADDFILE:
|
dongsheng@623
|
587 return ($errcode,$mode,$position,$boundary,$bmode,$content);
|
dongsheng@623
|
588 }
|
dongsheng@623
|
589
|
dongsheng@623
|
590 sub mychomp {
|
dongsheng@623
|
591 my ($str) = shift;
|
dongsheng@623
|
592 chomp($str);
|
dongsheng@623
|
593 return $str;
|
dongsheng@623
|
594 }
|
dongsheng@623
|
595
|
dongsheng@623
|
596 sub addendum {
|
dongsheng@623
|
597 my ($self,$filename) = @_;
|
dongsheng@623
|
598
|
dongsheng@623
|
599 print STDERR "Apply addendum $filename..." if $self->debug();
|
dongsheng@623
|
600 unless ($filename) {
|
dongsheng@623
|
601 warn wrap_msg(dgettext("po4a",
|
dongsheng@623
|
602 "Can't apply addendum when not given the filename"));
|
dongsheng@623
|
603 return 0;
|
dongsheng@623
|
604 }
|
dongsheng@623
|
605 die wrap_msg(dgettext("po4a", "Addendum %s does not exist."), $filename)
|
dongsheng@623
|
606 unless -e $filename;
|
dongsheng@623
|
607
|
dongsheng@623
|
608 my ($errcode,$mode,$position,$boundary,$bmode,$content)=
|
dongsheng@623
|
609 addendum_parse($filename);
|
dongsheng@623
|
610 return 0 if ($errcode);
|
dongsheng@623
|
611
|
dongsheng@623
|
612 print STDERR "mode=$mode;pos=$position;bound=$boundary;bmode=$bmode;ctn=$content\n"
|
dongsheng@623
|
613 if $self->debug();
|
dongsheng@623
|
614
|
dongsheng@623
|
615 # We only recode the addendum if an origin charset is specified, else we
|
dongsheng@623
|
616 # suppose it's already in the output document's charset
|
dongsheng@623
|
617 if (defined($self->{TT}{'addendum_charset'}) &&
|
dongsheng@623
|
618 length($self->{TT}{'addendum_charset'})) {
|
dongsheng@623
|
619 Encode::from_to($content,$self->{TT}{'addendum_charset'},
|
dongsheng@623
|
620 $self->get_out_charset);
|
dongsheng@623
|
621 }
|
dongsheng@623
|
622
|
dongsheng@623
|
623 my $found = scalar grep { /$position/ } @{$self->{TT}{doc_out}};
|
dongsheng@623
|
624 if ($found == 0) {
|
dongsheng@623
|
625 warn wrap_msg(dgettext("po4a",
|
dongsheng@623
|
626 "No candidate position for the addendum %s."), $filename);
|
dongsheng@623
|
627 return 0;
|
dongsheng@623
|
628 }
|
dongsheng@623
|
629 if ($found > 1) {
|
dongsheng@623
|
630 warn wrap_msg(dgettext("po4a",
|
dongsheng@623
|
631 "More than one candidate position found for the addendum %s."), $filename);
|
dongsheng@623
|
632 return 0;
|
dongsheng@623
|
633 }
|
dongsheng@623
|
634
|
dongsheng@623
|
635 if ($mode eq "before") {
|
dongsheng@623
|
636 if ($self->verbose() > 1 || $self->debug() ) {
|
dongsheng@623
|
637 map { print STDERR wrap_msg(dgettext("po4a", "Addendum '%s' applied before this line: %s"), $filename, $_) if (/$position/);
|
dongsheng@623
|
638 } @{$self->{TT}{doc_out}};
|
dongsheng@623
|
639 }
|
dongsheng@623
|
640 @{$self->{TT}{doc_out}} = map { /$position/ ? ($content,$_) : $_
|
dongsheng@623
|
641 } @{$self->{TT}{doc_out}};
|
dongsheng@623
|
642 } else {
|
dongsheng@623
|
643 my @newres=();
|
dongsheng@623
|
644
|
dongsheng@623
|
645 do {
|
dongsheng@623
|
646 # make sure it doesnt whine on empty document
|
dongsheng@623
|
647 my $line = scalar @{$self->{TT}{doc_out}} ? shift @{$self->{TT}{doc_out}} : "";
|
dongsheng@623
|
648 push @newres,$line;
|
dongsheng@623
|
649 my $outline=mychomp($line);
|
dongsheng@623
|
650 $outline =~ s/^[ \t]*//;
|
dongsheng@623
|
651
|
dongsheng@623
|
652 if ($line =~ m/$position/) {
|
dongsheng@623
|
653 while ($line=shift @{$self->{TT}{doc_out}}) {
|
dongsheng@623
|
654 last if ($line=~/$boundary/);
|
dongsheng@623
|
655 push @newres,$line;
|
dongsheng@623
|
656 }
|
dongsheng@623
|
657 if (defined $line) {
|
dongsheng@623
|
658 if ($bmode eq 'before') {
|
dongsheng@623
|
659 print wrap_msg(dgettext("po4a",
|
dongsheng@623
|
660 "Addendum '%s' applied before this line: %s"),
|
dongsheng@623
|
661 $filename, $outline)
|
dongsheng@623
|
662 if ($self->verbose() > 1 || $self->debug());
|
dongsheng@623
|
663 push @newres,$content;
|
dongsheng@623
|
664 push @newres,$line;
|
dongsheng@623
|
665 } else {
|
dongsheng@623
|
666 print wrap_msg(dgettext("po4a",
|
dongsheng@623
|
667 "Addendum '%s' applied after the line: %s."),
|
dongsheng@623
|
668 $filename, $outline)
|
dongsheng@623
|
669 if ($self->verbose() > 1 || $self->debug());
|
dongsheng@623
|
670 push @newres,$line;
|
dongsheng@623
|
671 push @newres,$content;
|
dongsheng@623
|
672 }
|
dongsheng@623
|
673 } else {
|
dongsheng@623
|
674 print wrap_msg(dgettext("po4a", "Addendum '%s' applied at the end of the file."), $filename)
|
dongsheng@623
|
675 if ($self->verbose() > 1 || $self->debug());
|
dongsheng@623
|
676 push @newres,$content;
|
dongsheng@623
|
677 }
|
dongsheng@623
|
678 }
|
dongsheng@623
|
679 } while (scalar @{$self->{TT}{doc_out}});
|
dongsheng@623
|
680 @{$self->{TT}{doc_out}} = @newres;
|
dongsheng@623
|
681 }
|
dongsheng@623
|
682 print STDERR "done.\n" if $self->debug();
|
dongsheng@623
|
683 return 1;
|
dongsheng@623
|
684 }
|
dongsheng@623
|
685
|
dongsheng@623
|
686 =back
|
dongsheng@623
|
687
|
dongsheng@623
|
688 =head1 INTERNAL FUNCTIONS used to write derivated parsers
|
dongsheng@623
|
689
|
dongsheng@623
|
690 =head2 Getting input, providing output
|
dongsheng@623
|
691
|
dongsheng@623
|
692 Four functions are provided to get input and return output. They are very
|
dongsheng@623
|
693 similar to shift/unshift and push/pop. The first pair is about input, while
|
dongsheng@623
|
694 the second is about output. Mnemonic: in input, you are interested in the
|
dongsheng@623
|
695 first line, what shift gives, and in output you want to add your result at
|
dongsheng@623
|
696 the end, like push does.
|
dongsheng@623
|
697
|
dongsheng@623
|
698 =over 4
|
dongsheng@623
|
699
|
dongsheng@623
|
700 =item shiftline()
|
dongsheng@623
|
701
|
dongsheng@623
|
702 This function returns the next line of the doc_in to be parsed and its
|
dongsheng@623
|
703 reference (packed as an array).
|
dongsheng@623
|
704
|
dongsheng@623
|
705 =item unshiftline($$)
|
dongsheng@623
|
706
|
dongsheng@623
|
707 Unshifts a line of the input document and its reference.
|
dongsheng@623
|
708
|
dongsheng@623
|
709 =item pushline($)
|
dongsheng@623
|
710
|
dongsheng@623
|
711 Push a new line to the doc_out.
|
dongsheng@623
|
712
|
dongsheng@623
|
713 =item popline()
|
dongsheng@623
|
714
|
dongsheng@623
|
715 Pop the last pushed line from the doc_out.
|
dongsheng@623
|
716
|
dongsheng@623
|
717 =back
|
dongsheng@623
|
718
|
dongsheng@623
|
719 =cut
|
dongsheng@623
|
720
|
dongsheng@623
|
721 sub shiftline {
|
dongsheng@623
|
722 my ($line,$ref)=(shift @{$_[0]->{TT}{doc_in}},
|
dongsheng@623
|
723 shift @{$_[0]->{TT}{doc_in}});
|
dongsheng@623
|
724 return ($line,$ref);
|
dongsheng@623
|
725 }
|
dongsheng@623
|
726 sub unshiftline {
|
dongsheng@623
|
727 my $self = shift;
|
dongsheng@623
|
728 unshift @{$self->{TT}{doc_in}},@_;
|
dongsheng@623
|
729 }
|
dongsheng@623
|
730
|
dongsheng@623
|
731 sub pushline { push @{$_[0]->{TT}{doc_out}}, $_[1] if defined $_[1]; }
|
dongsheng@623
|
732 sub popline { return pop @{$_[0]->{TT}{doc_out}}; }
|
dongsheng@623
|
733
|
dongsheng@623
|
734 =head2 Marking strings as translatable
|
dongsheng@623
|
735
|
dongsheng@623
|
736 One function is provided to handle the text which should be translated.
|
dongsheng@623
|
737
|
dongsheng@623
|
738 =over 4
|
dongsheng@623
|
739
|
dongsheng@623
|
740 =item translate($$$)
|
dongsheng@623
|
741
|
dongsheng@623
|
742 Mandatory arguments:
|
dongsheng@623
|
743
|
dongsheng@623
|
744 =over 2
|
dongsheng@623
|
745
|
dongsheng@623
|
746 =item -
|
dongsheng@623
|
747
|
dongsheng@623
|
748 A string to translate
|
dongsheng@623
|
749
|
dongsheng@623
|
750 =item -
|
dongsheng@623
|
751
|
dongsheng@623
|
752 The reference of this string (ie, position in inputfile)
|
dongsheng@623
|
753
|
dongsheng@623
|
754 =item -
|
dongsheng@623
|
755
|
dongsheng@623
|
756 The type of this string (ie, the textual description of its structural role
|
dongsheng@623
|
757 ; used in Locale::Po4a::Po::gettextization() ; see also L<po4a(7)|po4a.7>,
|
dongsheng@623
|
758 section I<Gettextization: how does it work?>)
|
dongsheng@623
|
759
|
dongsheng@623
|
760 =back
|
dongsheng@623
|
761
|
dongsheng@623
|
762 This function can also take some extra arguments. They must be organized as
|
dongsheng@623
|
763 a hash. For example:
|
dongsheng@623
|
764
|
dongsheng@623
|
765 $self->translate("string","ref","type",
|
dongsheng@623
|
766 'wrap' => 1);
|
dongsheng@623
|
767
|
dongsheng@623
|
768 =over
|
dongsheng@623
|
769
|
dongsheng@623
|
770 =item wrap
|
dongsheng@623
|
771
|
dongsheng@623
|
772 boolean indicating whether we can consider that whitespaces in string are
|
dongsheng@623
|
773 not important. If yes, the function canonizes the string before looking for
|
dongsheng@623
|
774 a translation or extracting it, and wraps the translation.
|
dongsheng@623
|
775
|
dongsheng@623
|
776 =item wrapcol
|
dongsheng@623
|
777
|
dongsheng@623
|
778 The column at which we should wrap (default: 76).
|
dongsheng@623
|
779
|
dongsheng@623
|
780 =item comment
|
dongsheng@623
|
781
|
dongsheng@623
|
782 An extra comment to add to the entry.
|
dongsheng@623
|
783
|
dongsheng@623
|
784 =back
|
dongsheng@623
|
785
|
dongsheng@623
|
786 Actions:
|
dongsheng@623
|
787
|
dongsheng@623
|
788 =over 2
|
dongsheng@623
|
789
|
dongsheng@623
|
790 =item -
|
dongsheng@623
|
791
|
dongsheng@623
|
792 Pushes the string, reference and type to po_out.
|
dongsheng@623
|
793
|
dongsheng@623
|
794 =item -
|
dongsheng@623
|
795
|
dongsheng@623
|
796 Returns the translation of the string (as found in po_in) so that the
|
dongsheng@623
|
797 parser can build the doc_out.
|
dongsheng@623
|
798
|
dongsheng@623
|
799 =item -
|
dongsheng@623
|
800
|
dongsheng@623
|
801 Handles the charsets to recode the strings before sending them to
|
dongsheng@623
|
802 po_out and before returning the translations.
|
dongsheng@623
|
803
|
dongsheng@623
|
804 =back
|
dongsheng@623
|
805
|
dongsheng@623
|
806 =back
|
dongsheng@623
|
807
|
dongsheng@623
|
808 =cut
|
dongsheng@623
|
809
|
dongsheng@623
|
810 sub translate {
|
dongsheng@623
|
811 my $self=shift;
|
dongsheng@623
|
812 my ($string,$ref,$type)=(shift,shift,shift);
|
dongsheng@623
|
813 my (%options)=@_;
|
dongsheng@623
|
814
|
dongsheng@623
|
815 # my $validoption="wrap wrapcol";
|
dongsheng@623
|
816 # my %validoption;
|
dongsheng@623
|
817
|
dongsheng@623
|
818 return "" unless defined($string) && length($string);
|
dongsheng@623
|
819
|
dongsheng@623
|
820 # map { $validoption{$_}=1 } (split(/ /,$validoption));
|
dongsheng@623
|
821 # foreach (keys %options) {
|
dongsheng@623
|
822 # Carp::confess "internal error: translate() called with unknown arg $_. Valid options: $validoption"
|
dongsheng@623
|
823 # unless $validoption{$_};
|
dongsheng@623
|
824 # }
|
dongsheng@623
|
825
|
dongsheng@623
|
826 my $in_charset;
|
dongsheng@623
|
827 if ($self->{TT}{ascii_input}) {
|
dongsheng@623
|
828 $in_charset = "ascii";
|
dongsheng@623
|
829 } else {
|
dongsheng@623
|
830 if (defined($self->{TT}{'file_in_charset'}) and
|
dongsheng@623
|
831 length($self->{TT}{'file_in_charset'}) and
|
dongsheng@623
|
832 $self->{TT}{'file_in_charset'} !~ m/ascii/i) {
|
dongsheng@623
|
833 $in_charset=$self->{TT}{'file_in_charset'};
|
dongsheng@623
|
834 } else {
|
dongsheng@623
|
835 # FYI, the document charset have to be determined *before* we see the first
|
dongsheng@623
|
836 # string to recode.
|
dongsheng@623
|
837 die wrap_mod("po4a", dgettext("po4a", "Couldn't determine the input document's charset. Please specify it on the command line. (non-ascii char at %s)"), $self->{TT}{non_ascii_ref})
|
dongsheng@623
|
838 }
|
dongsheng@623
|
839 }
|
dongsheng@623
|
840
|
dongsheng@623
|
841 if ($self->{TT}{po_in}->get_charset ne "CHARSET") {
|
dongsheng@623
|
842 $string = encode_from_to($string,
|
dongsheng@623
|
843 $self->{TT}{'file_in_encoder'},
|
dongsheng@623
|
844 $self->{TT}{po_in}{encoder});
|
dongsheng@623
|
845 }
|
dongsheng@623
|
846
|
dongsheng@623
|
847 if (defined $options{'wrapcol'} && $options{'wrapcol'} < 0) {
|
dongsheng@623
|
848 # FIXME: should be the parameter given with --width
|
dongsheng@623
|
849 $options{'wrapcol'} = 76 + $options{'wrapcol'};
|
dongsheng@623
|
850 }
|
dongsheng@623
|
851 my $transstring = $self->{TT}{po_in}->gettext($string,
|
dongsheng@623
|
852 'wrap' => $options{'wrap'}||0,
|
dongsheng@623
|
853 'wrapcol' => $options{'wrapcol'});
|
dongsheng@623
|
854
|
dongsheng@623
|
855 if ($self->{TT}{po_in}->get_charset ne "CHARSET") {
|
dongsheng@623
|
856 my $out_encoder = $self->{TT}{'file_out_encoder'};
|
dongsheng@623
|
857 unless (defined $out_encoder) {
|
dongsheng@623
|
858 $out_encoder = find_encoding($self->get_out_charset)
|
dongsheng@623
|
859 }
|
dongsheng@623
|
860 $transstring = encode_from_to($transstring,
|
dongsheng@623
|
861 $self->{TT}{po_in}{encoder},
|
dongsheng@623
|
862 $out_encoder);
|
dongsheng@623
|
863 }
|
dongsheng@623
|
864
|
dongsheng@623
|
865 # If the input document isn't completely in ascii, we should see what to
|
dongsheng@623
|
866 # do with the current string
|
dongsheng@623
|
867 unless ($self->{TT}{ascii_input}) {
|
dongsheng@623
|
868 my $out_charset = $self->{TT}{po_out}->get_charset;
|
dongsheng@623
|
869 # We set the output po charset
|
dongsheng@623
|
870 if ($out_charset eq "CHARSET") {
|
dongsheng@623
|
871 if ($self->{TT}{utf_mode}) {
|
dongsheng@623
|
872 $out_charset="utf-8";
|
dongsheng@623
|
873 } else {
|
dongsheng@623
|
874 $out_charset=$in_charset;
|
dongsheng@623
|
875 }
|
dongsheng@623
|
876 $self->{TT}{po_out}->set_charset($out_charset);
|
dongsheng@623
|
877 }
|
dongsheng@623
|
878 if ( $in_charset !~ /^$out_charset$/i ) {
|
dongsheng@623
|
879 Encode::from_to($string,$in_charset,$out_charset);
|
dongsheng@623
|
880 if (defined($options{'comment'}) and length($options{'comment'})) {
|
dongsheng@623
|
881 Encode::from_to($options{'comment'},$in_charset,$out_charset);
|
dongsheng@623
|
882 }
|
dongsheng@623
|
883 }
|
dongsheng@623
|
884 }
|
dongsheng@623
|
885
|
dongsheng@623
|
886 # the comments provided by the modules are automatic comments from the PO point of view
|
dongsheng@623
|
887 $self->{TT}{po_out}->push('msgid' => $string,
|
dongsheng@623
|
888 'reference' => $ref,
|
dongsheng@623
|
889 'type' => $type,
|
dongsheng@623
|
890 'automatic' => $options{'comment'},
|
dongsheng@623
|
891 'wrap' => $options{'wrap'}||0,
|
dongsheng@623
|
892 'wrapcol' => $options{'wrapcol'});
|
dongsheng@623
|
893
|
dongsheng@623
|
894 # if ($self->{TT}{po_in}->get_charset ne "CHARSET") {
|
dongsheng@623
|
895 # Encode::from_to($transstring,$self->{TT}{po_in}->get_charset,
|
dongsheng@623
|
896 # $self->get_out_charset);
|
dongsheng@623
|
897 # }
|
dongsheng@623
|
898
|
dongsheng@623
|
899 if ($options{'wrap'}||0) {
|
dongsheng@623
|
900 $transstring =~ s/( *)$//s;
|
dongsheng@623
|
901 my $trailing_spaces = $1||"";
|
dongsheng@623
|
902 $transstring =~ s/ *$//gm;
|
dongsheng@623
|
903 $transstring .= $trailing_spaces;
|
dongsheng@623
|
904 }
|
dongsheng@623
|
905
|
dongsheng@623
|
906 return $transstring;
|
dongsheng@623
|
907 }
|
dongsheng@623
|
908
|
dongsheng@623
|
909 =head2 Misc functions
|
dongsheng@623
|
910
|
dongsheng@623
|
911 =over 4
|
dongsheng@623
|
912
|
dongsheng@623
|
913 =item verbose()
|
dongsheng@623
|
914
|
dongsheng@623
|
915 Returns if the verbose option was passed during the creation of the
|
dongsheng@623
|
916 TransTractor.
|
dongsheng@623
|
917
|
dongsheng@623
|
918 =cut
|
dongsheng@623
|
919
|
dongsheng@623
|
920 sub verbose {
|
dongsheng@623
|
921 if (defined $_[1]) {
|
dongsheng@623
|
922 $_[0]->{TT}{verbose} = $_[1];
|
dongsheng@623
|
923 } else {
|
dongsheng@623
|
924 return $_[0]->{TT}{verbose} || 0; # undef and 0 have the same meaning, but one generates warnings
|
dongsheng@623
|
925 }
|
dongsheng@623
|
926 }
|
dongsheng@623
|
927
|
dongsheng@623
|
928 =item debug()
|
dongsheng@623
|
929
|
dongsheng@623
|
930 Returns if the debug option was passed during the creation of the
|
dongsheng@623
|
931 TransTractor.
|
dongsheng@623
|
932
|
dongsheng@623
|
933 =cut
|
dongsheng@623
|
934
|
dongsheng@623
|
935 sub debug {
|
dongsheng@623
|
936 return $_[0]->{TT}{debug};
|
dongsheng@623
|
937 }
|
dongsheng@623
|
938
|
dongsheng@623
|
939 =item detected_charset($)
|
dongsheng@623
|
940
|
dongsheng@623
|
941 This tells TransTractor that a new charset (the first argument) has been
|
dongsheng@623
|
942 detected from the input document. It can usually be read from the document
|
dongsheng@623
|
943 header. Only the first charset will remain, coming either from the
|
dongsheng@623
|
944 process() arguments or detected from the document.
|
dongsheng@623
|
945
|
dongsheng@623
|
946 =cut
|
dongsheng@623
|
947
|
dongsheng@623
|
948 sub detected_charset {
|
dongsheng@623
|
949 my ($self,$charset)=(shift,shift);
|
dongsheng@623
|
950 unless (defined($self->{TT}{'file_in_charset'}) and
|
dongsheng@623
|
951 length($self->{TT}{'file_in_charset'}) ) {
|
dongsheng@623
|
952 $self->{TT}{'file_in_charset'}=$charset;
|
dongsheng@623
|
953 if (defined $charset) {
|
dongsheng@623
|
954 $self->{TT}{'file_in_encoder'}=find_encoding($charset);
|
dongsheng@623
|
955 }
|
dongsheng@623
|
956 }
|
dongsheng@623
|
957
|
dongsheng@623
|
958 if (defined $self->{TT}{'file_in_charset'} and
|
dongsheng@623
|
959 length $self->{TT}{'file_in_charset'} and
|
dongsheng@623
|
960 $self->{TT}{'file_in_charset'} !~ m/ascii/i) {
|
dongsheng@623
|
961 $self->{TT}{ascii_input}=0;
|
dongsheng@623
|
962 }
|
dongsheng@623
|
963 }
|
dongsheng@623
|
964
|
dongsheng@623
|
965 =item get_out_charset()
|
dongsheng@623
|
966
|
dongsheng@623
|
967 This function will return the charset that should be used in the output
|
dongsheng@623
|
968 document (usually useful to substitute the input document's detected charset
|
dongsheng@623
|
969 where it has been found).
|
dongsheng@623
|
970
|
dongsheng@623
|
971 It will use the output charset specified in the command line. If it wasn't
|
dongsheng@623
|
972 specified, it will use the input po's charset, and if the input po has the
|
dongsheng@623
|
973 default "CHARSET", it will return the input document's charset, so that no
|
dongsheng@623
|
974 encoding is performed.
|
dongsheng@623
|
975
|
dongsheng@623
|
976 =cut
|
dongsheng@623
|
977
|
dongsheng@623
|
978 sub get_out_charset {
|
dongsheng@623
|
979 my $self=shift;
|
dongsheng@623
|
980 my $charset;
|
dongsheng@623
|
981
|
dongsheng@623
|
982 # Use the value specified at the command line
|
dongsheng@623
|
983 if (defined($self->{TT}{'file_out_charset'}) and
|
dongsheng@623
|
984 length($self->{TT}{'file_out_charset'})) {
|
dongsheng@623
|
985 $charset=$self->{TT}{'file_out_charset'};
|
dongsheng@623
|
986 } else {
|
dongsheng@623
|
987 if ($self->{TT}{utf_mode} && $self->{TT}{ascii_input}) {
|
dongsheng@623
|
988 $charset="utf-8";
|
dongsheng@623
|
989 } else {
|
dongsheng@623
|
990 $charset=$self->{TT}{po_in}->get_charset;
|
dongsheng@623
|
991 $charset=$self->{TT}{'file_in_charset'}
|
dongsheng@623
|
992 if $charset eq "CHARSET" and
|
dongsheng@623
|
993 defined($self->{TT}{'file_in_charset'}) and
|
dongsheng@623
|
994 length($self->{TT}{'file_in_charset'});
|
dongsheng@623
|
995 $charset="ascii"
|
dongsheng@623
|
996 if $charset eq "CHARSET";
|
dongsheng@623
|
997 }
|
dongsheng@623
|
998 }
|
dongsheng@623
|
999 return $charset;
|
dongsheng@623
|
1000 }
|
dongsheng@623
|
1001
|
dongsheng@623
|
1002 =item recode_skipped_text($)
|
dongsheng@623
|
1003
|
dongsheng@623
|
1004 This function returns the recoded text passed as argument, from the input
|
dongsheng@623
|
1005 document's charset to the output document's one. This isn't needed when
|
dongsheng@623
|
1006 translating a string (translate() recodes everything itself), but it is when
|
dongsheng@623
|
1007 you skip a string from the input document and you want the output document to
|
dongsheng@623
|
1008 be consistent with the global encoding.
|
dongsheng@623
|
1009
|
dongsheng@623
|
1010 =cut
|
dongsheng@623
|
1011
|
dongsheng@623
|
1012 sub recode_skipped_text {
|
dongsheng@623
|
1013 my ($self,$text)=(shift,shift);
|
dongsheng@623
|
1014 unless ($self->{TT}{'ascii_input'}) {
|
dongsheng@623
|
1015 if(defined($self->{TT}{'file_in_charset'}) and
|
dongsheng@623
|
1016 length($self->{TT}{'file_in_charset'}) ) {
|
dongsheng@623
|
1017 $text = encode_from_to($text,
|
dongsheng@623
|
1018 $self->{TT}{'file_in_encoder'},
|
dongsheng@623
|
1019 find_encoding($self->get_out_charset));
|
dongsheng@623
|
1020 } else {
|
dongsheng@623
|
1021 die wrap_mod("po4a", dgettext("po4a", "Couldn't determine the input document's charset. Please specify it on the command line. (non-ascii char at %s)"), $self->{TT}{non_ascii_ref})
|
dongsheng@623
|
1022 }
|
dongsheng@623
|
1023 }
|
dongsheng@623
|
1024 return $text;
|
dongsheng@623
|
1025 }
|
dongsheng@623
|
1026
|
dongsheng@623
|
1027
|
dongsheng@623
|
1028 # encode_from_to($,$,$)
|
dongsheng@623
|
1029 #
|
dongsheng@623
|
1030 # Encode the given text from one encoding to another one.
|
dongsheng@623
|
1031 # It differs from Encode::from_to because it does not take the name of the
|
dongsheng@623
|
1032 # encoding in argument, but the encoders (as returned by the
|
dongsheng@623
|
1033 # Encode::find_encoding(<name>) method). Thus it permits to save a bunch
|
dongsheng@623
|
1034 # of call to find_encoding.
|
dongsheng@623
|
1035 #
|
dongsheng@623
|
1036 # If the "from" encoding is undefined, it is considered as UTF-8 (or
|
dongsheng@623
|
1037 # ascii).
|
dongsheng@623
|
1038 # If the "to" encoding is undefined, it is considered as UTF-8.
|
dongsheng@623
|
1039 #
|
dongsheng@623
|
1040 sub encode_from_to {
|
dongsheng@623
|
1041 my ($text,$from,$to) = (shift,shift,shift);
|
dongsheng@623
|
1042
|
dongsheng@623
|
1043 if (not defined $from) {
|
dongsheng@623
|
1044 # for ascii and UTF-8, no conversion needed to get an utf-8
|
dongsheng@623
|
1045 # string.
|
dongsheng@623
|
1046 } else {
|
dongsheng@623
|
1047 $text = $from->decode($text, 0);
|
dongsheng@623
|
1048 }
|
dongsheng@623
|
1049
|
dongsheng@623
|
1050 if (not defined $to) {
|
dongsheng@623
|
1051 # Already in UTF-8, no conversion needed
|
dongsheng@623
|
1052 } else {
|
dongsheng@623
|
1053 $text = $to->encode($text, 0);
|
dongsheng@623
|
1054 }
|
dongsheng@623
|
1055
|
dongsheng@623
|
1056 return $text;
|
dongsheng@623
|
1057 }
|
dongsheng@623
|
1058
|
dongsheng@623
|
1059 =back
|
dongsheng@623
|
1060
|
dongsheng@623
|
1061 =head1 FUTURE DIRECTIONS
|
dongsheng@623
|
1062
|
dongsheng@623
|
1063 One shortcoming of the current TransTractor is that it can't handle
|
dongsheng@623
|
1064 translated document containing all languages, like debconf templates, or
|
dongsheng@623
|
1065 .desktop files.
|
dongsheng@623
|
1066
|
dongsheng@623
|
1067 To address this problem, the only interface changes needed are:
|
dongsheng@623
|
1068
|
dongsheng@623
|
1069 =over 2
|
dongsheng@623
|
1070
|
dongsheng@623
|
1071 =item -
|
dongsheng@623
|
1072
|
dongsheng@623
|
1073 take a hash as po_in_name (a list per language)
|
dongsheng@623
|
1074
|
dongsheng@623
|
1075 =item -
|
dongsheng@623
|
1076
|
dongsheng@623
|
1077 add an argument to translate to indicate the target language
|
dongsheng@623
|
1078
|
dongsheng@623
|
1079 =item -
|
dongsheng@623
|
1080
|
dongsheng@623
|
1081 make a pushline_all function, which would make pushline of its content for
|
dongsheng@623
|
1082 all language, using a map-like syntax:
|
dongsheng@623
|
1083
|
dongsheng@623
|
1084 $self->pushline_all({ "Description[".$langcode."]=".
|
dongsheng@623
|
1085 $self->translate($line,$ref,$langcode)
|
dongsheng@623
|
1086 });
|
dongsheng@623
|
1087
|
dongsheng@623
|
1088 =back
|
dongsheng@623
|
1089
|
dongsheng@623
|
1090 Will see if it's enough ;)
|
dongsheng@623
|
1091
|
dongsheng@623
|
1092 =head1 AUTHORS
|
dongsheng@623
|
1093
|
dongsheng@623
|
1094 Denis Barbier <barbier@linuxfr.org>
|
dongsheng@623
|
1095 Martin Quinson (mquinson#debian.org)
|
dongsheng@623
|
1096 Jordi Vilalta <jvprat@gmail.com>
|
dongsheng@623
|
1097
|
dongsheng@623
|
1098 =cut
|
dongsheng@623
|
1099
|
dongsheng@623
|
1100 1;
|