hgbook: 082bb76417f1 tools/po4a/lib/Locale/Po4a/TransTractor.pm

837 die wrap_mod("po4a", dgettext("po4a", "Couldn't determine the input document's charset. Please specify it on the command line. (non-ascii char at %s)"), $self->{TT}{non_ascii_ref})

838 }

839 }

840

841 if ($self->{TT}{po_in}->get_charset ne "CHARSET") {

842 $string = encode_from_to($string,

843 $self->{TT}{'file_in_encoder'},

844 $self->{TT}{po_in}{encoder});

845 }

846

847 if (defined $options{'wrapcol'} && $options{'wrapcol'} < 0) {

848 # FIXME: should be the parameter given with --width

849 $options{'wrapcol'} = 76 + $options{'wrapcol'};

850 }

851 my $transstring = $self->{TT}{po_in}->gettext($string,

852 'wrap' => $options{'wrap'}||0,

853 'wrapcol' => $options{'wrapcol'});

854

855 if ($self->{TT}{po_in}->get_charset ne "CHARSET") {

856 my $out_encoder = $self->{TT}{'file_out_encoder'};

857 unless (defined $out_encoder) {

858 $out_encoder = find_encoding($self->get_out_charset)

859 }

860 $transstring = encode_from_to($transstring,

861 $self->{TT}{po_in}{encoder},

862 $out_encoder);

863 }

864

865 # If the input document isn't completely in ascii, we should see what to

866 # do with the current string

867 unless ($self->{TT}{ascii_input}) {

868 my $out_charset = $self->{TT}{po_out}->get_charset;

869 # We set the output po charset

870 if ($out_charset eq "CHARSET") {

871 if ($self->{TT}{utf_mode}) {

872 $out_charset="utf-8";

873 } else {

874 $out_charset=$in_charset;

875 }

876 $self->{TT}{po_out}->set_charset($out_charset);

877 }

878 if ( $in_charset !~ /^$out_charset$/i ) {

879 Encode::from_to($string,$in_charset,$out_charset);

880 if (defined($options{'comment'}) and length($options{'comment'})) {

881 Encode::from_to($options{'comment'},$in_charset,$out_charset);

882 }

883 }

884 }

885

886 # the comments provided by the modules are automatic comments from the PO point of view

887 $self->{TT}{po_out}->push('msgid' => $string,

888 'reference' => $ref,

889 'type' => $type,

890 'automatic' => $options{'comment'},

891 'wrap' => $options{'wrap'}||0,

892 'wrapcol' => $options{'wrapcol'});

893

894 # if ($self->{TT}{po_in}->get_charset ne "CHARSET") {

895 # Encode::from_to($transstring,$self->{TT}{po_in}->get_charset,

896 # $self->get_out_charset);

897 # }

898

899 if ($options{'wrap'}||0) {

900 $transstring =~ s/( *)$//s;

901 my $trailing_spaces = $1||"";

902 $transstring =~ s/ *$//gm;

903 $transstring .= $trailing_spaces;

904 }

905

906 return $transstring;

907 }

908

909 =head2 Misc functions

910

911 =over 4

912

913 =item verbose()

914

915 Returns if the verbose option was passed during the creation of the

916 TransTractor.

917

918 =cut

919

920 sub verbose {

921 if (defined $_[1]) {

922 $_[0]->{TT}{verbose} = $_[1];

923 } else {

924 return $_[0]->{TT}{verbose} || 0; # undef and 0 have the same meaning, but one generates warnings

925 }

926 }

927

928 =item debug()

929

930 Returns if the debug option was passed during the creation of the

931 TransTractor.

932

933 =cut

934

935 sub debug {

936 return $_[0]->{TT}{debug};

937 }

938

939 =item detected_charset($)

940

941 This tells TransTractor that a new charset (the first argument) has been

942 detected from the input document. It can usually be read from the document

943 header. Only the first charset will remain, coming either from the

944 process() arguments or detected from the document.

945

946 =cut

947

948 sub detected_charset {

949 my ($self,$charset)=(shift,shift);

950 unless (defined($self->{TT}{'file_in_charset'}) and

951 length($self->{TT}{'file_in_charset'}) ) {

952 $self->{TT}{'file_in_charset'}=$charset;

953 if (defined $charset) {

954 $self->{TT}{'file_in_encoder'}=find_encoding($charset);

955 }

956 }

957

958 if (defined $self->{TT}{'file_in_charset'} and

959 length $self->{TT}{'file_in_charset'} and

960 $self->{TT}{'file_in_charset'} !~ m/ascii/i) {

961 $self->{TT}{ascii_input}=0;

962 }

963 }

964

965 =item get_out_charset()

966

967 This function will return the charset that should be used in the output

968 document (usually useful to substitute the input document's detected charset

969 where it has been found).

970

971 It will use the output charset specified in the command line. If it wasn't

972 specified, it will use the input po's charset, and if the input po has the

973 default "CHARSET", it will return the input document's charset, so that no

974 encoding is performed.

975

976 =cut

977

978 sub get_out_charset {

979 my $self=shift;

980 my $charset;

981

982 # Use the value specified at the command line

983 if (defined($self->{TT}{'file_out_charset'}) and

984 length($self->{TT}{'file_out_charset'})) {

985 $charset=$self->{TT}{'file_out_charset'};

986 } else {

987 if ($self->{TT}{utf_mode} && $self->{TT}{ascii_input}) {

988 $charset="utf-8";

989 } else {

990 $charset=$self->{TT}{po_in}->get_charset;

991 $charset=$self->{TT}{'file_in_charset'}

992 if $charset eq "CHARSET" and

993 defined($self->{TT}{'file_in_charset'}) and

994 length($self->{TT}{'file_in_charset'});

995 $charset="ascii"

996 if $charset eq "CHARSET";

997 }

998 }

999 return $charset;

1000 }

1001

1002 =item recode_skipped_text($)

1003

1004 This function returns the recoded text passed as argument, from the input

1005 document's charset to the output document's one. This isn't needed when

1006 translating a string (translate() recodes everything itself), but it is when

1007 you skip a string from the input document and you want the output document to

1008 be consistent with the global encoding.

1009

1010 =cut

1011

1012 sub recode_skipped_text {

1013 my ($self,$text)=(shift,shift);

1014 unless ($self->{TT}{'ascii_input'}) {

1015 if(defined($self->{TT}{'file_in_charset'}) and

1016 length($self->{TT}{'file_in_charset'}) ) {

1017 $text = encode_from_to($text,

1018 $self->{TT}{'file_in_encoder'},

1019 find_encoding($self->get_out_charset));

1020 } else {

1021 die wrap_mod("po4a", dgettext("po4a", "Couldn't determine the input document's charset. Please specify it on the command line. (non-ascii char at %s)"), $self->{TT}{non_ascii_ref})

1022 }

1023 }

1024 return $text;

1025 }

1026

1027

1028 # encode_from_to($,$,$)

1029 #

1030 # Encode the given text from one encoding to another one.

1031 # It differs from Encode::from_to because it does not take the name of the

1032 # encoding in argument, but the encoders (as returned by the

1033 # Encode::find_encoding(<name>) method). Thus it permits to save a bunch