author | zecke <zecke> | 2003-04-20 17:24:50 (UTC) |
---|---|---|
committer | zecke <zecke> | 2003-04-20 17:24:50 (UTC) |
commit | 92c687d281f69085436a77efb8cd1d4d2d9333f8 (patch) (unidiff) | |
tree | 7bbfac3ea310ed140e78cb3c17f8219294cb61d0 /development/translation/opie-lupdate/numberh.cpp | |
parent | f1f4e6794507d9b8dafb46ce05968a0647a41777 (diff) | |
download | opie-92c687d281f69085436a77efb8cd1d4d2d9333f8.zip opie-92c687d281f69085436a77efb8cd1d4d2d9333f8.tar.gz opie-92c687d281f69085436a77efb8cd1d4d2d9333f8.tar.bz2 |
Initial revision
Diffstat (limited to 'development/translation/opie-lupdate/numberh.cpp') (more/less context) (ignore whitespace changes)
-rw-r--r-- | development/translation/opie-lupdate/numberh.cpp | 235 |
1 files changed, 235 insertions, 0 deletions
diff --git a/development/translation/opie-lupdate/numberh.cpp b/development/translation/opie-lupdate/numberh.cpp new file mode 100644 index 0000000..f7b7bf8 --- a/dev/null +++ b/development/translation/opie-lupdate/numberh.cpp | |||
@@ -0,0 +1,235 @@ | |||
1 | /********************************************************************** | ||
2 | ** Copyright (C) 2000-2002 Trolltech AS. All rights reserved. | ||
3 | ** | ||
4 | ** This file is part of Qt Linguist. | ||
5 | ** | ||
6 | ** This file may be distributed and/or modified under the terms of the | ||
7 | ** GNU General Public License version 2 as published by the Free Software | ||
8 | ** Foundation and appearing in the file LICENSE.GPL included in the | ||
9 | ** packaging of this file. | ||
10 | ** | ||
11 | ** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE | ||
12 | ** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. | ||
13 | ** | ||
14 | ** See http://www.trolltech.com/gpl/ for GPL licensing information. | ||
15 | ** | ||
16 | ** Contact info@trolltech.com if any conditions of this licensing are | ||
17 | ** not clear to you. | ||
18 | ** | ||
19 | **********************************************************************/ | ||
20 | |||
21 | #include <metatranslator.h> | ||
22 | |||
23 | #include <qmemarray.h> | ||
24 | #include <qcstring.h> | ||
25 | #include <qmap.h> | ||
26 | #include <qstringlist.h> | ||
27 | |||
28 | #include <ctype.h> | ||
29 | |||
30 | typedef QMap<QCString, MetaTranslatorMessage> TMM; | ||
31 | typedef QValueList<MetaTranslatorMessage> TML; | ||
32 | |||
33 | static bool isDigitFriendly( int c ) | ||
34 | { | ||
35 | return ispunct( c ) || isspace( c ); | ||
36 | } | ||
37 | |||
38 | static int numberLength( const char *s ) | ||
39 | { | ||
40 | int i = 0; | ||
41 | |||
42 | if ( isdigit(s[0]) ) { | ||
43 | do { | ||
44 | i++; | ||
45 | } while ( isdigit(s[i]) || | ||
46 | (isDigitFriendly(s[i]) && | ||
47 | (isdigit(s[i + 1]) || | ||
48 | (isDigitFriendly(s[i + 1]) && isdigit(s[i + 2])))) ); | ||
49 | } | ||
50 | return i; | ||
51 | } | ||
52 | |||
53 | /* | ||
54 | Returns a version of 'key' where all numbers have been replaced by zeroes. If | ||
55 | there were none, returns "". | ||
56 | */ | ||
57 | static QCString zeroKey( const char *key ) | ||
58 | { | ||
59 | QCString zeroed( strlen(key) + 1 ); | ||
60 | char *z = zeroed.data(); | ||
61 | int i = 0, j = 0; | ||
62 | int len; | ||
63 | bool metSomething = FALSE; | ||
64 | |||
65 | while ( key[i] != '\0' ) { | ||
66 | len = numberLength( key + i ); | ||
67 | if ( len > 0 ) { | ||
68 | i += len; | ||
69 | z[j++] = '0'; | ||
70 | metSomething = TRUE; | ||
71 | } else { | ||
72 | z[j++] = key[i++]; | ||
73 | } | ||
74 | } | ||
75 | z[j] = '\0'; | ||
76 | |||
77 | if ( metSomething ) | ||
78 | return zeroed; | ||
79 | else | ||
80 | return ""; | ||
81 | } | ||
82 | |||
83 | static QString translationAttempt( const QString& oldTranslation, | ||
84 | const char *oldSource, | ||
85 | const char *newSource ) | ||
86 | { | ||
87 | int p = zeroKey( oldSource ).contains( '0' ); | ||
88 | int oldSourceLen = qstrlen( oldSource ); | ||
89 | QString attempt; | ||
90 | QStringList oldNumbers; | ||
91 | QStringList newNumbers; | ||
92 | QMemArray<bool> met( p ); | ||
93 | QMemArray<int> matchedYet( p ); | ||
94 | int i, j; | ||
95 | int k = 0, ell, best; | ||
96 | int m, n; | ||
97 | int pass; | ||
98 | |||
99 | /* | ||
100 | This algorithm is hard to follow, so we'll consider an example | ||
101 | all along: oldTranslation is "XeT 3.0", oldSource is "TeX 3.0" | ||
102 | and newSource is "XeT 3.1". | ||
103 | |||
104 | First, we set up two tables: oldNumbers and newNumbers. In our | ||
105 | example, oldNumber[0] is "3.0" and newNumber[0] is "3.1". | ||
106 | */ | ||
107 | for ( i = 0, j = 0; i < oldSourceLen; i++, j++ ) { | ||
108 | m = numberLength( oldSource + i ); | ||
109 | n = numberLength( newSource + j ); | ||
110 | if ( m > 0 ) { | ||
111 | oldNumbers.append( QCString(oldSource + i, m + 1) ); | ||
112 | newNumbers.append( QCString(newSource + j, n + 1) ); | ||
113 | i += m; | ||
114 | j += n; | ||
115 | met[k] = FALSE; | ||
116 | matchedYet[k] = 0; | ||
117 | k++; | ||
118 | } | ||
119 | } | ||
120 | |||
121 | /* | ||
122 | We now go over the old translation, "XeT 3.0", one letter at a | ||
123 | time, looking for numbers found in oldNumbers. Whenever such a | ||
124 | number is met, it is replaced with its newNumber equivalent. In | ||
125 | our example, the "3.0" of "XeT 3.0" becomes "3.1". | ||
126 | */ | ||
127 | for ( i = 0; i < (int) oldTranslation.length(); i++ ) { | ||
128 | attempt += oldTranslation[i]; | ||
129 | for ( k = 0; k < p; k++ ) { | ||
130 | if ( oldTranslation[i] == oldNumbers[k][matchedYet[k]] ) | ||
131 | matchedYet[k]++; | ||
132 | else | ||
133 | matchedYet[k] = 0; | ||
134 | } | ||
135 | |||
136 | /* | ||
137 | Let's find out if the last character ended a match. We make | ||
138 | two passes over the data. In the first pass, we try to | ||
139 | match only numbers that weren't matched yet; if that fails, | ||
140 | the second pass does the trick. This is useful in some | ||
141 | suspicious cases, flagged below. | ||
142 | */ | ||
143 | for ( pass = 0; pass < 2; pass++ ) { | ||
144 | best = p; // an impossible value | ||
145 | for ( k = 0; k < p; k++ ) { | ||
146 | if ( (!met[k] || pass > 0) && | ||
147 | matchedYet[k] == (int) oldNumbers[k].length() && | ||
148 | numberLength(oldTranslation.latin1() + (i + 1) - | ||
149 | matchedYet[k]) == matchedYet[k] ) { | ||
150 | // the longer the better | ||
151 | if ( best == p || matchedYet[k] > matchedYet[best] ) | ||
152 | best = k; | ||
153 | } | ||
154 | } | ||
155 | if ( best != p ) { | ||
156 | attempt.truncate( attempt.length() - matchedYet[best] ); | ||
157 | attempt += newNumbers[best]; | ||
158 | met[best] = TRUE; | ||
159 | for ( k = 0; k < p; k++ ) | ||
160 | matchedYet[k] = 0; | ||
161 | break; | ||
162 | } | ||
163 | } | ||
164 | } | ||
165 | |||
166 | /* | ||
167 | We flag two kinds of suspicious cases. They are identified as | ||
168 | such with comments such as "{2000?}" at the end. | ||
169 | |||
170 | Example of the first kind: old source text "TeX 3.0" translated | ||
171 | as "XeT 2.0" is flagged "TeX 2.0 {3.0?}", no matter what the | ||
172 | new text is. | ||
173 | */ | ||
174 | for ( k = 0; k < p; k++ ) { | ||
175 | if ( !met[k] ) | ||
176 | attempt += QString( " {" ) + newNumbers[k] + QString( "?}" ); | ||
177 | } | ||
178 | |||
179 | /* | ||
180 | Example of the second kind: "1 of 1" translated as "1 af 1", | ||
181 | with new source text "1 of 2", generates "1 af 2 {1 or 2?}" | ||
182 | because it's not clear which of "1 af 2" and "2 af 1" is right. | ||
183 | */ | ||
184 | for ( k = 0; k < p; k++ ) { | ||
185 | for ( ell = 0; ell < p; ell++ ) { | ||
186 | if ( k != ell && oldNumbers[k] == oldNumbers[ell] && | ||
187 | newNumbers[k] < newNumbers[ell] ) | ||
188 | attempt += QString( " {" ) + newNumbers[k] + QString( " or " ) + | ||
189 | newNumbers[ell] + QString( "?}" ); | ||
190 | } | ||
191 | } | ||
192 | return attempt; | ||
193 | } | ||
194 | |||
195 | /* | ||
196 | Augments a MetaTranslator with translations easily derived from | ||
197 | similar existing (probably obsolete) translations. | ||
198 | |||
199 | For example, if "TeX 3.0" is translated as "XeT 3.0" and "TeX 3.1" | ||
200 | has no translation, "XeT 3.1" is added to the translator and is | ||
201 | marked Unfinished. | ||
202 | */ | ||
203 | void applyNumberHeuristic( MetaTranslator *tor, bool verbose ) | ||
204 | { | ||
205 | TMM translated, untranslated; | ||
206 | TMM::Iterator t, u; | ||
207 | TML all = tor->messages(); | ||
208 | TML::Iterator it; | ||
209 | int inserted = 0; | ||
210 | |||
211 | for ( it = all.begin(); it != all.end(); ++it ) { | ||
212 | if ( (*it).type() == MetaTranslatorMessage::Unfinished ) { | ||
213 | if ( (*it).translation().isEmpty() ) | ||
214 | untranslated.insert( zeroKey((*it).sourceText()), *it ); | ||
215 | } else if ( !(*it).translation().isEmpty() ) { | ||
216 | translated.insert( zeroKey((*it).sourceText()), *it ); | ||
217 | } | ||
218 | } | ||
219 | |||
220 | for ( u = untranslated.begin(); u != untranslated.end(); ++u ) { | ||
221 | t = translated.find( u.key() ); | ||
222 | if ( t != translated.end() && !t.key().isEmpty() && | ||
223 | qstrcmp((*t).sourceText(), (*u).sourceText()) != 0 ) { | ||
224 | MetaTranslatorMessage m( *u ); | ||
225 | m.setTranslation( translationAttempt((*t).translation(), | ||
226 | (*t).sourceText(), | ||
227 | (*u).sourceText()) ); | ||
228 | tor->insert( m ); | ||
229 | inserted++; | ||
230 | } | ||
231 | } | ||
232 | if ( verbose && inserted != 0 ) | ||
233 | fprintf( stderr, " number heuristic provided %d translation%s\n", | ||
234 | inserted, inserted == 1 ? "" : "s" ); | ||
235 | } | ||