summaryrefslogtreecommitdiff
path: root/development/translation/opie-lupdate/numberh.cpp
Unidiff
Diffstat (limited to 'development/translation/opie-lupdate/numberh.cpp') (more/less context) (ignore whitespace changes)
-rw-r--r--development/translation/opie-lupdate/numberh.cpp235
1 files changed, 235 insertions, 0 deletions
diff --git a/development/translation/opie-lupdate/numberh.cpp b/development/translation/opie-lupdate/numberh.cpp
new file mode 100644
index 0000000..f7b7bf8
--- a/dev/null
+++ b/development/translation/opie-lupdate/numberh.cpp
@@ -0,0 +1,235 @@
1/**********************************************************************
2** Copyright (C) 2000-2002 Trolltech AS. All rights reserved.
3**
4** This file is part of Qt Linguist.
5**
6** This file may be distributed and/or modified under the terms of the
7** GNU General Public License version 2 as published by the Free Software
8** Foundation and appearing in the file LICENSE.GPL included in the
9** packaging of this file.
10**
11** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
12** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
13**
14** See http://www.trolltech.com/gpl/ for GPL licensing information.
15**
16** Contact info@trolltech.com if any conditions of this licensing are
17** not clear to you.
18**
19**********************************************************************/
20
21#include <metatranslator.h>
22
23#include <qmemarray.h>
24#include <qcstring.h>
25#include <qmap.h>
26#include <qstringlist.h>
27
28#include <ctype.h>
29
30typedef QMap<QCString, MetaTranslatorMessage> TMM;
31typedef QValueList<MetaTranslatorMessage> TML;
32
33static bool isDigitFriendly( int c )
34{
35 return ispunct( c ) || isspace( c );
36}
37
38static int numberLength( const char *s )
39{
40 int i = 0;
41
42 if ( isdigit(s[0]) ) {
43 do {
44 i++;
45 } while ( isdigit(s[i]) ||
46 (isDigitFriendly(s[i]) &&
47 (isdigit(s[i + 1]) ||
48 (isDigitFriendly(s[i + 1]) && isdigit(s[i + 2])))) );
49 }
50 return i;
51}
52
53/*
54 Returns a version of 'key' where all numbers have been replaced by zeroes. If
55 there were none, returns "".
56*/
57static QCString zeroKey( const char *key )
58{
59 QCString zeroed( strlen(key) + 1 );
60 char *z = zeroed.data();
61 int i = 0, j = 0;
62 int len;
63 bool metSomething = FALSE;
64
65 while ( key[i] != '\0' ) {
66 len = numberLength( key + i );
67 if ( len > 0 ) {
68 i += len;
69 z[j++] = '0';
70 metSomething = TRUE;
71 } else {
72 z[j++] = key[i++];
73 }
74 }
75 z[j] = '\0';
76
77 if ( metSomething )
78 return zeroed;
79 else
80 return "";
81}
82
83static QString translationAttempt( const QString& oldTranslation,
84 const char *oldSource,
85 const char *newSource )
86{
87 int p = zeroKey( oldSource ).contains( '0' );
88 int oldSourceLen = qstrlen( oldSource );
89 QString attempt;
90 QStringList oldNumbers;
91 QStringList newNumbers;
92 QMemArray<bool> met( p );
93 QMemArray<int> matchedYet( p );
94 int i, j;
95 int k = 0, ell, best;
96 int m, n;
97 int pass;
98
99 /*
100 This algorithm is hard to follow, so we'll consider an example
101 all along: oldTranslation is "XeT 3.0", oldSource is "TeX 3.0"
102 and newSource is "XeT 3.1".
103
104 First, we set up two tables: oldNumbers and newNumbers. In our
105 example, oldNumber[0] is "3.0" and newNumber[0] is "3.1".
106 */
107 for ( i = 0, j = 0; i < oldSourceLen; i++, j++ ) {
108 m = numberLength( oldSource + i );
109 n = numberLength( newSource + j );
110 if ( m > 0 ) {
111 oldNumbers.append( QCString(oldSource + i, m + 1) );
112 newNumbers.append( QCString(newSource + j, n + 1) );
113 i += m;
114 j += n;
115 met[k] = FALSE;
116 matchedYet[k] = 0;
117 k++;
118 }
119 }
120
121 /*
122 We now go over the old translation, "XeT 3.0", one letter at a
123 time, looking for numbers found in oldNumbers. Whenever such a
124 number is met, it is replaced with its newNumber equivalent. In
125 our example, the "3.0" of "XeT 3.0" becomes "3.1".
126 */
127 for ( i = 0; i < (int) oldTranslation.length(); i++ ) {
128 attempt += oldTranslation[i];
129 for ( k = 0; k < p; k++ ) {
130 if ( oldTranslation[i] == oldNumbers[k][matchedYet[k]] )
131 matchedYet[k]++;
132 else
133 matchedYet[k] = 0;
134 }
135
136 /*
137 Let's find out if the last character ended a match. We make
138 two passes over the data. In the first pass, we try to
139 match only numbers that weren't matched yet; if that fails,
140 the second pass does the trick. This is useful in some
141 suspicious cases, flagged below.
142 */
143 for ( pass = 0; pass < 2; pass++ ) {
144 best = p; // an impossible value
145 for ( k = 0; k < p; k++ ) {
146 if ( (!met[k] || pass > 0) &&
147 matchedYet[k] == (int) oldNumbers[k].length() &&
148 numberLength(oldTranslation.latin1() + (i + 1) -
149 matchedYet[k]) == matchedYet[k] ) {
150 // the longer the better
151 if ( best == p || matchedYet[k] > matchedYet[best] )
152 best = k;
153 }
154 }
155 if ( best != p ) {
156 attempt.truncate( attempt.length() - matchedYet[best] );
157 attempt += newNumbers[best];
158 met[best] = TRUE;
159 for ( k = 0; k < p; k++ )
160 matchedYet[k] = 0;
161 break;
162 }
163 }
164 }
165
166 /*
167 We flag two kinds of suspicious cases. They are identified as
168 such with comments such as "{2000?}" at the end.
169
170 Example of the first kind: old source text "TeX 3.0" translated
171 as "XeT 2.0" is flagged "TeX 2.0 {3.0?}", no matter what the
172 new text is.
173 */
174 for ( k = 0; k < p; k++ ) {
175 if ( !met[k] )
176 attempt += QString( " {" ) + newNumbers[k] + QString( "?}" );
177 }
178
179 /*
180 Example of the second kind: "1 of 1" translated as "1 af 1",
181 with new source text "1 of 2", generates "1 af 2 {1 or 2?}"
182 because it's not clear which of "1 af 2" and "2 af 1" is right.
183 */
184 for ( k = 0; k < p; k++ ) {
185 for ( ell = 0; ell < p; ell++ ) {
186 if ( k != ell && oldNumbers[k] == oldNumbers[ell] &&
187 newNumbers[k] < newNumbers[ell] )
188 attempt += QString( " {" ) + newNumbers[k] + QString( " or " ) +
189 newNumbers[ell] + QString( "?}" );
190 }
191 }
192 return attempt;
193}
194
195/*
196 Augments a MetaTranslator with translations easily derived from
197 similar existing (probably obsolete) translations.
198
199 For example, if "TeX 3.0" is translated as "XeT 3.0" and "TeX 3.1"
200 has no translation, "XeT 3.1" is added to the translator and is
201 marked Unfinished.
202*/
203void applyNumberHeuristic( MetaTranslator *tor, bool verbose )
204{
205 TMM translated, untranslated;
206 TMM::Iterator t, u;
207 TML all = tor->messages();
208 TML::Iterator it;
209 int inserted = 0;
210
211 for ( it = all.begin(); it != all.end(); ++it ) {
212 if ( (*it).type() == MetaTranslatorMessage::Unfinished ) {
213 if ( (*it).translation().isEmpty() )
214 untranslated.insert( zeroKey((*it).sourceText()), *it );
215 } else if ( !(*it).translation().isEmpty() ) {
216 translated.insert( zeroKey((*it).sourceText()), *it );
217 }
218 }
219
220 for ( u = untranslated.begin(); u != untranslated.end(); ++u ) {
221 t = translated.find( u.key() );
222 if ( t != translated.end() && !t.key().isEmpty() &&
223 qstrcmp((*t).sourceText(), (*u).sourceText()) != 0 ) {
224 MetaTranslatorMessage m( *u );
225 m.setTranslation( translationAttempt((*t).translation(),
226 (*t).sourceText(),
227 (*u).sourceText()) );
228 tor->insert( m );
229 inserted++;
230 }
231 }
232 if ( verbose && inserted != 0 )
233 fprintf( stderr, " number heuristic provided %d translation%s\n",
234 inserted, inserted == 1 ? "" : "s" );
235}