build: better not use a local var, it breaks various shells
[project/luci.git] / build / i18n-scan.pl
1 #!/usr/bin/perl
2
3 use strict;
4 use warnings;
5 use Text::Balanced qw(extract_bracketed extract_delimited extract_tagged);
6
7 @ARGV >= 1 || die "Usage: $0 <source direcory>\n";
8
9
10 my %stringtable;
11
12 sub dec_lua_str
13 {
14         my $s = shift;
15         $s =~ s/[\s\n]+/ /g;
16         $s =~ s/\\n/\n/g;
17         $s =~ s/\\t/\t/g;
18         $s =~ s/\\(.)/$1/g;
19         $s =~ s/^ //;
20         $s =~ s/ $//;
21         return $s;
22 }
23
24 sub dec_tpl_str
25 {
26         my $s = shift;
27         $s =~ s/-$//;
28         $s =~ s/[\s\n]+/ /g;
29         $s =~ s/^ //;
30         $s =~ s/ $//;
31         return $s;
32 }
33
34
35 if( open F, "find @ARGV -type f '(' -name '*.htm' -o -name '*.lua' ')' |" )
36 {
37         while( defined( my $file = readline F ) )
38         {
39                 chomp $file;
40
41                 if( open S, "< $file" )
42                 {
43                         local $/ = undef;
44                         my $raw = <S>;
45                         close S;
46
47
48                         my $text = $raw;
49
50                         while( $text =~ s/ ^ .*? (?:translate|translatef|i18n|_) [\n\s]* \( /(/sgx )
51                         {
52                                 ( my $code, $text ) = extract_bracketed($text, q{('")});
53
54                                 $code =~ s/^\([\n\s]*//;
55                                 $code =~ s/[\n\s]*\)$//;
56
57                                 my $res = "";
58                                 my $sub = "";
59
60                                 if( $code =~ /^['"]/ )
61                                 {
62                                         while( defined $sub )
63                                         {
64                                                 ( $sub, $code ) = extract_delimited($code, q{'"}, q{\s*(?:\.\.\s*)?});
65
66                                                 if( defined $sub )
67                                                 {
68                                                         $res .= substr $sub, 1, length($sub) - 2;
69                                                 }
70                                         }
71                                 }
72                                 elsif( $code =~ /^(\[=*\[)/ )
73                                 {
74                                         my $stag = quotemeta $1;
75                                         my $etag = $stag;
76                                            $etag =~ s/\[/]/g;
77
78                                         ( $res ) = extract_tagged($code, $stag, $etag);
79
80                                         $res =~ s/^$stag//;
81                                         $res =~ s/$etag$//;
82                                 }
83
84                                 $res = dec_lua_str($res);
85                                 $stringtable{$res}++ if $res;
86                         }
87
88
89                         $text = $raw;
90
91                         while( $text =~ s/ ^ .*? <% -? [:_] /<%/sgx )
92                         {
93                                 ( my $code, $text ) = extract_tagged($text, '<%', '%>');
94
95                                 if( defined $code )
96                                 {
97                                         $code = dec_tpl_str(substr $code, 2, length($code) - 4);
98                                         $stringtable{$code}++;
99                                 }
100                         }
101                 }
102         }
103
104         close F;
105 }
106
107
108 if( open C, "| msgcat -" )
109 {
110         printf C "msgid \"\"\nmsgstr \"Content-Type: text/plain; charset=UTF-8\"\n\n";
111
112         foreach my $key ( sort keys %stringtable )
113         {
114                 if( length $key )
115                 {
116                         $key =~ s/"/\\"/g;
117                         printf C "msgid \"%s\"\nmsgstr \"\"\n\n", $key;
118                 }
119         }
120
121         close C;
122 }