author | zecke <zecke> | 2002-09-10 12:09:49 (UTC) |
---|---|---|
committer | zecke <zecke> | 2002-09-10 12:09:49 (UTC) |
commit | 6b77a1cdb9536b1c135eb86d53a6b2c22c19b0a4 (patch) (unidiff) | |
tree | 6ebc93c6432f4ed9d00ef1448b6a047ef522a79a /library/qdawg.cpp | |
parent | d10cddb3c9ce75bc90b14add14bc133737fe35aa (diff) | |
download | opie-6b77a1cdb9536b1c135eb86d53a6b2c22c19b0a4.zip opie-6b77a1cdb9536b1c135eb86d53a6b2c22c19b0a4.tar.gz opie-6b77a1cdb9536b1c135eb86d53a6b2c22c19b0a4.tar.bz2 |
Qtopia1-6 merge
still to test
bic changes to be resolved
more changes to be made?
-rw-r--r-- | library/qdawg.cpp | 123 |
1 files changed, 120 insertions, 3 deletions
diff --git a/library/qdawg.cpp b/library/qdawg.cpp index 3615693..af5dc82 100644 --- a/library/qdawg.cpp +++ b/library/qdawg.cpp | |||
@@ -1,510 +1,627 @@ | |||
1 | /********************************************************************** | 1 | /********************************************************************** |
2 | ** Copyright (C) 2000 Trolltech AS. All rights reserved. | 2 | ** Copyright (C) 2000-2002 Trolltech AS. All rights reserved. |
3 | ** | 3 | ** |
4 | ** This file is part of Qtopia Environment. | 4 | ** This file is part of the Qtopia Environment. |
5 | ** | 5 | ** |
6 | ** This file may be distributed and/or modified under the terms of the | 6 | ** This file may be distributed and/or modified under the terms of the |
7 | ** GNU General Public License version 2 as published by the Free Software | 7 | ** GNU General Public License version 2 as published by the Free Software |
8 | ** Foundation and appearing in the file LICENSE.GPL included in the | 8 | ** Foundation and appearing in the file LICENSE.GPL included in the |
9 | ** packaging of this file. | 9 | ** packaging of this file. |
10 | ** | 10 | ** |
11 | ** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE | 11 | ** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE |
12 | ** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. | 12 | ** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. |
13 | ** | 13 | ** |
14 | ** See http://www.trolltech.com/gpl/ for GPL licensing information. | 14 | ** See http://www.trolltech.com/gpl/ for GPL licensing information. |
15 | ** | 15 | ** |
16 | ** Contact info@trolltech.com if any conditions of this licensing are | 16 | ** Contact info@trolltech.com if any conditions of this licensing are |
17 | ** not clear to you. | 17 | ** not clear to you. |
18 | ** | 18 | ** |
19 | **********************************************************************/ | 19 | **********************************************************************/ |
20 | #include "qdawg.h" | 20 | #include "qdawg.h" |
21 | #include <qintdict.h> | 21 | #include <qintdict.h> |
22 | #include <qvaluelist.h> | 22 | #include <qvaluelist.h> |
23 | #include <qtextstream.h> | 23 | #include <qtextstream.h> |
24 | #include <qfile.h> | 24 | #include <qfile.h> |
25 | #include <qtl.h> | 25 | #include <qtl.h> |
26 | 26 | ||
27 | #include <limits.h> | 27 | #include <limits.h> |
28 | #include <stdio.h> | 28 | #include <stdio.h> |
29 | 29 | ||
30 | // for mmap | 30 | // for mmap |
31 | #include <sys/types.h> | 31 | #include <sys/types.h> |
32 | #include <sys/stat.h> | 32 | #include <sys/stat.h> |
33 | #include <sys/mman.h> | 33 | #include <sys/mman.h> |
34 | #include <fcntl.h> | 34 | #include <fcntl.h> |
35 | #include <errno.h> | 35 | #include <errno.h> |
36 | #include <unistd.h> | 36 | #include <unistd.h> |
37 | 37 | ||
38 | class QDawgPrivate; | 38 | class QDawgPrivate; |
39 | class QTrie; | 39 | class QTrie; |
40 | 40 | ||
41 | typedef QValueList<QTrie*> TrieClub; | 41 | typedef QValueList<QTrie*> TrieClub; |
42 | typedef QIntDict<TrieClub> TrieClubDirectory; | 42 | typedef QIntDict<TrieClub> TrieClubDirectory; |
43 | 43 | ||
44 | class TriePtr { | 44 | class TriePtr { |
45 | public: | 45 | public: |
46 | QChar letter; | 46 | QChar letter; |
47 | QTrie* p; | 47 | QTrie* p; |
48 | int operator <(const TriePtr& o) const; | 48 | int operator <(const TriePtr& o) const; |
49 | int operator >(const TriePtr& o) const; | 49 | int operator >(const TriePtr& o) const; |
50 | int operator <=(const TriePtr& o) const; | 50 | int operator <=(const TriePtr& o) const; |
51 | }; | 51 | }; |
52 | 52 | ||
53 | class TrieList : public QValueList<TriePtr> { | 53 | class TrieList : public QValueList<TriePtr> { |
54 | bool sorted; | 54 | bool sorted; |
55 | public: | 55 | public: |
56 | TrieList() | 56 | TrieList() |
57 | { | 57 | { |
58 | sorted=TRUE; | 58 | sorted=TRUE; |
59 | } | 59 | } |
60 | 60 | ||
61 | QTrie* findAdd(QChar c); | 61 | QTrie* findAdd(QChar c); |
62 | bool equal(TrieList& l); | 62 | bool equal(TrieList& l); |
63 | 63 | ||
64 | void sort() | 64 | void sort() |
65 | { | 65 | { |
66 | if ( !sorted ) { | 66 | if ( !sorted ) { |
67 | qHeapSort(*this); | 67 | qHeapSort(*this); |
68 | sorted = TRUE; | 68 | sorted = TRUE; |
69 | } | 69 | } |
70 | } | 70 | } |
71 | }; | 71 | }; |
72 | 72 | ||
73 | // A fast but memory-wasting temporary class. The Dawg is the goal. | 73 | // A fast but memory-wasting temporary class. The Dawg is the goal. |
74 | class QTrie { | 74 | class QTrie { |
75 | public: | 75 | public: |
76 | QTrie(); | 76 | QTrie(); |
77 | ~QTrie(); | 77 | ~QTrie(); |
78 | 78 | ||
79 | void insertWord(const QString& s, uint index=0); | 79 | void insertWord(const QString& s, uint index=0); |
80 | bool equal(QTrie* o); | 80 | bool equal(QTrie* o); |
81 | void dump(int indent=0); | 81 | void dump(int indent=0); |
82 | 82 | ||
83 | private: | 83 | private: |
84 | TrieList children; | 84 | TrieList children; |
85 | bool isword; | 85 | bool isword; |
86 | 86 | ||
87 | friend class QDawgPrivate; | 87 | friend class QDawgPrivate; |
88 | int maxdepth; | 88 | int maxdepth; |
89 | int decendants; | 89 | int decendants; |
90 | int key; | 90 | int key; |
91 | void distributeKeys(TrieClubDirectory& directory); | 91 | void distributeKeys(TrieClubDirectory& directory); |
92 | QTrie* clubLeader(TrieClubDirectory& directory); | 92 | QTrie* clubLeader(TrieClubDirectory& directory); |
93 | int collectKeys(); | 93 | int collectKeys(); |
94 | friend class TriePtr; | 94 | friend class TriePtr; |
95 | friend class TrieList; | 95 | friend class TrieList; |
96 | }; | 96 | }; |
97 | 97 | ||
98 | QTrie::QTrie() | 98 | QTrie::QTrie() |
99 | { | 99 | { |
100 | key = 0; | 100 | key = 0; |
101 | isword = FALSE; | 101 | isword = FALSE; |
102 | } | 102 | } |
103 | 103 | ||
104 | QTrie::~QTrie() | 104 | QTrie::~QTrie() |
105 | { | 105 | { |
106 | // NOTE: we do not delete the children - after conversion to DAWG | 106 | // NOTE: we do not delete the children - after conversion to DAWG |
107 | // it's too difficult. The QTrie's are deleted via the directory. | 107 | // it's too difficult. The QTrie's are deleted via the directory. |
108 | } | 108 | } |
109 | 109 | ||
110 | void QTrie::insertWord(const QString& s, uint index) | 110 | void QTrie::insertWord(const QString& s, uint index) |
111 | { | 111 | { |
112 | if ( index == s.length() ) { | 112 | if ( index == s.length() ) { |
113 | isword = TRUE; | 113 | isword = TRUE; |
114 | } else { | 114 | } else { |
115 | QTrie* t = children.findAdd(s[index]); | 115 | QTrie* t = children.findAdd(s[index]); |
116 | t->insertWord(s,index+1); | 116 | t->insertWord(s,index+1); |
117 | } | 117 | } |
118 | } | 118 | } |
119 | 119 | ||
120 | bool QTrie::equal(QTrie* o) | 120 | bool QTrie::equal(QTrie* o) |
121 | { | 121 | { |
122 | if ( o == this ) return TRUE; | 122 | if ( o == this ) return TRUE; |
123 | if ( isword != o->isword ) | 123 | if ( isword != o->isword ) |
124 | return FALSE; | 124 | return FALSE; |
125 | return children.equal(o->children); | 125 | return children.equal(o->children); |
126 | } | 126 | } |
127 | 127 | ||
128 | void QTrie::dump(int indent) | 128 | void QTrie::dump(int indent) |
129 | { | 129 | { |
130 | for (TrieList::Iterator it=children.begin(); it!=children.end(); ++it) { | 130 | for (TrieList::Iterator it=children.begin(); it!=children.end(); ++it) { |
131 | QTrie* s = (*it).p; | 131 | QTrie* s = (*it).p; |
132 | for (int in=0; in<indent; in++) | 132 | for (int in=0; in<indent; in++) |
133 | fputc(' ',stderr); | 133 | fputc(' ',stderr); |
134 | fprintf(stderr," %c %d %s %p\n",(*it).letter.unicode(), | 134 | fprintf(stderr," %c %d %s %p\n",(*it).letter.unicode(), |
135 | s->key,s->isword?"word":"",s); | 135 | s->key,s->isword?"word":"",s); |
136 | s->dump(indent+2); | 136 | s->dump(indent+2); |
137 | } | 137 | } |
138 | } | 138 | } |
139 | 139 | ||
140 | void QTrie::distributeKeys(TrieClubDirectory& directory) | 140 | void QTrie::distributeKeys(TrieClubDirectory& directory) |
141 | { | 141 | { |
142 | maxdepth = INT_MIN; | 142 | maxdepth = INT_MIN; |
143 | decendants = children.count(); | 143 | decendants = children.count(); |
144 | key = 0; | 144 | key = 0; |
145 | for (TrieList::Iterator it=children.begin(); it!=children.end(); ++it) { | 145 | for (TrieList::Iterator it=children.begin(); it!=children.end(); ++it) { |
146 | QTrie* s = (*it).p; | 146 | QTrie* s = (*it).p; |
147 | QChar l = (*it).letter; | 147 | QChar l = (*it).letter; |
148 | s->distributeKeys(directory); | 148 | s->distributeKeys(directory); |
149 | key = key*64+l.unicode()+s->key*5; | 149 | key = key*64+l.unicode()+s->key*5; |
150 | decendants += s->decendants; | 150 | decendants += s->decendants; |
151 | if ( s->maxdepth+1 > maxdepth ) | 151 | if ( s->maxdepth+1 > maxdepth ) |
152 | maxdepth = s->maxdepth+1; | 152 | maxdepth = s->maxdepth+1; |
153 | } | 153 | } |
154 | if ( decendants ) { | 154 | if ( decendants ) { |
155 | key += decendants + maxdepth*256 + children.count() * 65536; | 155 | key += decendants + maxdepth*256 + children.count() * 65536; |
156 | if ( !key ) key++; // unlikely | 156 | if ( !key ) key++; // unlikely |
157 | } | 157 | } |
158 | TrieClub* c = directory[key]; | 158 | TrieClub* c = directory[key]; |
159 | if ( !c ) directory.insert(key, (c = new TrieClub) ); | 159 | if ( !c ) directory.insert(key, (c = new TrieClub) ); |
160 | c->prepend(this); | 160 | c->prepend(this); |
161 | } | 161 | } |
162 | 162 | ||
163 | QTrie* QTrie::clubLeader(TrieClubDirectory& directory) | 163 | QTrie* QTrie::clubLeader(TrieClubDirectory& directory) |
164 | { | 164 | { |
165 | if ( !key ) return directory[0]->first(); | 165 | if ( !key ) return directory[0]->first(); |
166 | for (TrieList::Iterator it=children.begin(); it!=children.end(); ++it) { | 166 | for (TrieList::Iterator it=children.begin(); it!=children.end(); ++it) { |
167 | QTrie* t= (*it).p->clubLeader(directory); | 167 | QTrie* t= (*it).p->clubLeader(directory); |
168 | (*it).p = t; | 168 | (*it).p = t; |
169 | } | 169 | } |
170 | TrieClub *club = directory[key]; | 170 | TrieClub *club = directory[key]; |
171 | for (TrieClub::Iterator it = club->begin(); it != club->end(); ++it) { | 171 | for (TrieClub::Iterator it = club->begin(); it != club->end(); ++it) { |
172 | QTrie* o = *it; | 172 | QTrie* o = *it; |
173 | if ( o->equal(this) ) | 173 | if ( o->equal(this) ) |
174 | return o; | 174 | return o; |
175 | } | 175 | } |
176 | return this; | 176 | return this; |
177 | } | 177 | } |
178 | 178 | ||
179 | int QTrie::collectKeys() | 179 | int QTrie::collectKeys() |
180 | { | 180 | { |
181 | int n=0; | 181 | int n=0; |
182 | if ( key ) key=0,n+=children.count(); | 182 | if ( key ) key=0,n+=children.count(); |
183 | for (TrieList::Iterator it=children.begin(); it!=children.end(); ++it) | 183 | for (TrieList::Iterator it=children.begin(); it!=children.end(); ++it) |
184 | n += (*it).p->collectKeys(); | 184 | n += (*it).p->collectKeys(); |
185 | return n; | 185 | return n; |
186 | } | 186 | } |
187 | 187 | ||
188 | int TriePtr::operator <(const TriePtr& o) const | 188 | int TriePtr::operator <(const TriePtr& o) const |
189 | { return letter < o.letter; } | 189 | { return letter < o.letter; } |
190 | int TriePtr::operator >(const TriePtr& o) const | 190 | int TriePtr::operator >(const TriePtr& o) const |
191 | { return letter > o.letter; } | 191 | { return letter > o.letter; } |
192 | int TriePtr::operator <=(const TriePtr& o) const | 192 | int TriePtr::operator <=(const TriePtr& o) const |
193 | { return letter <= o.letter; } | 193 | { return letter <= o.letter; } |
194 | 194 | ||
195 | bool TrieList::equal(TrieList& l) | 195 | bool TrieList::equal(TrieList& l) |
196 | { | 196 | { |
197 | if ( count() != l.count() ) | 197 | if ( count() != l.count() ) |
198 | return FALSE; | 198 | return FALSE; |
199 | sort(); l.sort(); | 199 | sort(); l.sort(); |
200 | ConstIterator it2 = begin(); | 200 | ConstIterator it2 = begin(); |
201 | ConstIterator it = l.begin(); | 201 | ConstIterator it = l.begin(); |
202 | for( ; it != l.end(); ++it, ++it2 ) | 202 | for( ; it != l.end(); ++it, ++it2 ) |
203 | if ( (*it).letter != (*it2).letter || ! (*it).p->equal((*it2).p) ) | 203 | if ( (*it).letter != (*it2).letter || ! (*it).p->equal((*it2).p) ) |
204 | return FALSE; | 204 | return FALSE; |
205 | return TRUE; | 205 | return TRUE; |
206 | } | 206 | } |
207 | QTrie* TrieList::findAdd(QChar c) | 207 | QTrie* TrieList::findAdd(QChar c) |
208 | { | 208 | { |
209 | for (Iterator it=begin(); it!=end(); ++it) { | 209 | for (Iterator it=begin(); it!=end(); ++it) { |
210 | if ( (*it).letter == c ) | 210 | if ( (*it).letter == c ) |
211 | return (*it).p; | 211 | return (*it).p; |
212 | } | 212 | } |
213 | TriePtr p; | 213 | TriePtr p; |
214 | p.p = new QTrie; | 214 | p.p = new QTrie; |
215 | p.letter = c; | 215 | p.letter = c; |
216 | prepend(p); | 216 | prepend(p); |
217 | sorted=FALSE; | 217 | sorted=FALSE; |
218 | sort(); | 218 | sort(); |
219 | return p.p; | 219 | return p.p; |
220 | } | 220 | } |
221 | 221 | ||
222 | static const char* dawg_sig = "QDAWG100"; | 222 | static const char* dawg_sig = "QDAWG100"; |
223 | 223 | ||
224 | class QDawgPrivate { | 224 | class QDawgPrivate { |
225 | public: | 225 | public: |
226 | QDawgPrivate(QIODevice* dev) | 226 | QDawgPrivate(QIODevice* dev) |
227 | { | 227 | { |
228 | QDataStream ds(dev); | 228 | QDataStream ds(dev); |
229 | char sig[8]; | 229 | char sig[8]; |
230 | ds.readRawBytes(sig,8); | 230 | ds.readRawBytes(sig,8); |
231 | if ( !strncmp(dawg_sig,sig,8) ) { | 231 | if ( !strncmp(dawg_sig,sig,8) ) { |
232 | uint n; | 232 | uint n; |
233 | char* nn; | 233 | char* nn; |
234 | ds.readBytes(nn,n); | 234 | ds.readBytes(nn,n); |
235 | 235 | ||
236 | // #### endianness problem ignored. | 236 | // #### endianness problem ignored. |
237 | node = (QDawg::Node*)nn; | 237 | node = (QDawg::Node*)nn; |
238 | nodes = n / sizeof(QDawg::Node); | 238 | nodes = n / sizeof(QDawg::Node); |
239 | } else { | 239 | } else { |
240 | node = 0; | 240 | node = 0; |
241 | } | 241 | } |
242 | } | 242 | } |
243 | 243 | ||
244 | bool ok() const { return node; } | 244 | bool ok() const { return node; } |
245 | 245 | ||
246 | QDawgPrivate(uchar* mem) | 246 | QDawgPrivate(uchar* mem) |
247 | { | 247 | { |
248 | if ( !strncmp(dawg_sig,(char*)mem,8) ) { | 248 | if ( !strncmp(dawg_sig,(char*)mem,8) ) { |
249 | mem += 8; | 249 | mem += 8; |
250 | 250 | ||
251 | int n = ((mem[0]*256+mem[1])*256+mem[2])*256+mem[3]; | 251 | int n = ((mem[0]*256+mem[1])*256+mem[2])*256+mem[3]; |
252 | mem += 4; | 252 | mem += 4; |
253 | 253 | ||
254 | // #### endianness problem ignored. | 254 | // #### endianness problem ignored. |
255 | node = (QDawg::Node*)((char*)mem); | 255 | node = (QDawg::Node*)((char*)mem); |
256 | nodes = n / sizeof(QDawg::Node); | 256 | nodes = n / sizeof(QDawg::Node); |
257 | } | 257 | } |
258 | } | 258 | } |
259 | 259 | ||
260 | QDawgPrivate(QTrie* t) // destroys the QTrie. | 260 | QDawgPrivate(QTrie* t) // destroys the QTrie. |
261 | { | 261 | { |
262 | TrieClubDirectory directory(9973); | 262 | TrieClubDirectory directory(9973); |
263 | t->distributeKeys(directory); | 263 | t->distributeKeys(directory); |
264 | QTrie* l = t->clubLeader(directory); | 264 | QTrie* l = t->clubLeader(directory); |
265 | ASSERT(l==t); | 265 | ASSERT(l==t); |
266 | generateArray(t); | 266 | generateArray(t); |
267 | 267 | ||
268 | TrieClub *club; | 268 | TrieClub *club; |
269 | for (QIntDictIterator<TrieClub> dit(directory); (club=dit); ++dit) | 269 | for (QIntDictIterator<TrieClub> dit(directory); (club=dit); ++dit) |
270 | { | 270 | { |
271 | for (TrieClub::Iterator it = club->begin(); it != club->end(); ++it) { | 271 | for (TrieClub::Iterator it = club->begin(); it != club->end(); ++it) { |
272 | delete *it; | 272 | delete *it; |
273 | } | 273 | } |
274 | delete club; | 274 | delete club; |
275 | } | 275 | } |
276 | } | 276 | } |
277 | 277 | ||
278 | bool write(QIODevice* dev) | 278 | bool write(QIODevice* dev) |
279 | { | 279 | { |
280 | QDataStream ds(dev); | 280 | QDataStream ds(dev); |
281 | ds.writeRawBytes(dawg_sig,8); | 281 | ds.writeRawBytes(dawg_sig,8); |
282 | // #### endianness problem ignored. | 282 | // #### endianness problem ignored. |
283 | ds.writeBytes((char*)node,sizeof(QDawg::Node)*nodes); | 283 | ds.writeBytes((char*)node,sizeof(QDawg::Node)*nodes); |
284 | return dev->state() == IO_Ok; | 284 | return dev->state() == IO_Ok; |
285 | } | 285 | } |
286 | 286 | ||
287 | void dumpWords(int nid=0, int index=0) | 287 | void dumpWords(int nid=0, int index=0) |
288 | { | 288 | { |
289 | static char word[256]; // ick latin1 | 289 | static char word[256]; // ick latin1 |
290 | int i=0; | 290 | int i=0; |
291 | do { | 291 | do { |
292 | QDawg::Node& n = node[nid+i]; | 292 | QDawg::Node& n = node[nid+i]; |
293 | word[index] = n.let; | 293 | word[index] = n.let; |
294 | if ( n.isword ) | 294 | if ( n.isword ) |
295 | fprintf(stderr,"%.*s\n",index+1,word); | 295 | fprintf(stderr,"%.*s\n",index+1,word); |
296 | if ( n.offset ) dumpWords(n.offset+nid+i,index+1); | 296 | if ( n.offset ) dumpWords(n.offset+nid+i,index+1); |
297 | } while (!node[nid+i++].islast); | 297 | } while (!node[nid+i++].islast); |
298 | } | 298 | } |
299 | 299 | ||
300 | void dump(int nid=0, int indent=0) | 300 | void dump(int nid=0, int indent=0) |
301 | { | 301 | { |
302 | int i=0; | 302 | int i=0; |
303 | do { | 303 | do { |
304 | QDawg::Node& n = node[nid+i]; | 304 | QDawg::Node& n = node[nid+i]; |
305 | fprintf(stderr,"%d: ",nid+i); | 305 | fprintf(stderr,"%d: ",nid+i); |
306 | for (int in=0; in<indent; in++) | 306 | for (int in=0; in<indent; in++) |
307 | fputc(' ',stderr); | 307 | fputc(' ',stderr); |
308 | fprintf(stderr," %c %d %d %d\n",n.let, | 308 | fprintf(stderr," %c %d %d %d\n",n.let, |
309 | n.isword,n.islast,n.offset); | 309 | n.isword,n.islast,n.offset); |
310 | if ( n.offset ) dump(n.offset+nid+i,indent+2); | 310 | if ( n.offset ) dump(n.offset+nid+i,indent+2); |
311 | } while (!node[nid+i++].islast); | 311 | } while (!node[nid+i++].islast); |
312 | } | 312 | } |
313 | 313 | ||
314 | int countWords(int nid=0) | 314 | int countWords(int nid=0) |
315 | { | 315 | { |
316 | int t=0; | 316 | int t=0; |
317 | int i=0; | 317 | int i=0; |
318 | do { | 318 | do { |
319 | QDawg::Node& n = node[nid+i]; | 319 | QDawg::Node& n = node[nid+i]; |
320 | if ( n.isword ) | 320 | if ( n.isword ) |
321 | t++; | 321 | t++; |
322 | if ( n.offset ) | 322 | if ( n.offset ) |
323 | t+=countWords(n.offset+nid+i); | 323 | t+=countWords(n.offset+nid+i); |
324 | } while (!node[nid+i++].islast); | 324 | } while (!node[nid+i++].islast); |
325 | return t; | 325 | return t; |
326 | } | 326 | } |
327 | 327 | ||
328 | bool contains(const QString& s, int nid=0, int index=0) const | 328 | bool contains(const QString& s, int nid=0, int index=0) const |
329 | { | 329 | { |
330 | int i=0; | 330 | int i=0; |
331 | do { | 331 | do { |
332 | QDawg::Node& n = node[nid+i]; | 332 | QDawg::Node& n = node[nid+i]; |
333 | if ( s[index] == QChar((ushort)n.let) ) { | 333 | if ( s[index] == QChar((ushort)n.let) ) { |
334 | if ( n.isword && index == (int)s.length()-1 ) | 334 | if ( n.isword && index == (int)s.length()-1 ) |
335 | return TRUE; | 335 | return TRUE; |
336 | if ( n.offset ) | 336 | if ( n.offset ) |
337 | return contains(s,n.offset+nid+i,index+1); | 337 | return contains(s,n.offset+nid+i,index+1); |
338 | } | 338 | } |
339 | } while (!node[nid+i++].islast); | 339 | } while (!node[nid+i++].islast); |
340 | return FALSE; | 340 | return FALSE; |
341 | } | 341 | } |
342 | 342 | ||
343 | void appendAllWords(QStringList& list, int nid=0, QString s="") const | 343 | void appendAllWords(QStringList& list, int nid=0, QString s="") const |
344 | { | 344 | { |
345 | int i=0; | 345 | int i=0; |
346 | int next = s.length(); | 346 | int next = s.length(); |
347 | do { | 347 | do { |
348 | QDawg::Node& n = node[nid+i]; | 348 | QDawg::Node& n = node[nid+i]; |
349 | s[next] = QChar((ushort)n.let); | 349 | s[next] = QChar((ushort)n.let); |
350 | if ( n.isword ) | 350 | if ( n.isword ) |
351 | list.append(s); | 351 | list.append(s); |
352 | if ( n.offset ) | 352 | if ( n.offset ) |
353 | appendAllWords(list, n.offset+nid+i, s); | 353 | appendAllWords(list, n.offset+nid+i, s); |
354 | } while (!node[nid+i++].islast); | 354 | } while (!node[nid+i++].islast); |
355 | } | 355 | } |
356 | 356 | ||
357 | const QDawg::Node* root() { return node; } | 357 | const QDawg::Node* root() { return node; } |
358 | 358 | ||
359 | private: | 359 | private: |
360 | void generateArray(QTrie* t) | 360 | void generateArray(QTrie* t) |
361 | { | 361 | { |
362 | nodes = 0; | 362 | nodes = 0; |
363 | int n = t->collectKeys(); | 363 | int n = t->collectKeys(); |
364 | node = new QDawg::Node[n]; | 364 | node = new QDawg::Node[n]; |
365 | appendToArray(t); | 365 | appendToArray(t); |
366 | ASSERT(n == nodes); | 366 | ASSERT(n == nodes); |
367 | } | 367 | } |
368 | 368 | ||
369 | int appendToArray(QTrie* t) | 369 | int appendToArray(QTrie* t) |
370 | { | 370 | { |
371 | if ( !t->key ) { | 371 | if ( !t->key ) { |
372 | if ( !t->children.count() ) | 372 | if ( !t->children.count() ) |
373 | return 0; | 373 | return 0; |
374 | t->key = nodes; | 374 | t->key = nodes; |
375 | nodes += t->children.count(); | 375 | nodes += t->children.count(); |
376 | QDawg::Node* n = &node[t->key-1]; | 376 | QDawg::Node* n = &node[t->key-1]; |
377 | int here = t->key; | 377 | int here = t->key; |
378 | for (TrieList::Iterator it=t->children.begin(); it!=t->children.end(); ++it) { | 378 | for (TrieList::Iterator it=t->children.begin(); it!=t->children.end(); ++it) { |
379 | QTrie* s = (*it).p; | 379 | QTrie* s = (*it).p; |
380 | ++n; | 380 | ++n; |
381 | n->let = (*it).letter.unicode(); | 381 | n->let = (*it).letter.unicode(); |
382 | n->isword = s->isword; | 382 | n->isword = s->isword; |
383 | n->islast = 0; | 383 | n->islast = 0; |
384 | n->offset = appendToArray(s); | 384 | n->offset = appendToArray(s); |
385 | if ( n->offset ) { | 385 | if ( n->offset ) { |
386 | int t = n->offset-here; | 386 | int t = n->offset-here; |
387 | n->offset=t; | 387 | n->offset=t; |
388 | if ( n->offset != t ) | 388 | if ( n->offset != t ) |
389 | qWarning("Overflow: too many words"); | 389 | qWarning("Overflow: too many words"); |
390 | } | 390 | } |
391 | here++; | 391 | here++; |
392 | } | 392 | } |
393 | n->islast = 1; | 393 | n->islast = 1; |
394 | } | 394 | } |
395 | return t->key; | 395 | return t->key; |
396 | } | 396 | } |
397 | 397 | ||
398 | private: | 398 | private: |
399 | int nodes; | 399 | int nodes; |
400 | QDawg::Node *node; | 400 | QDawg::Node *node; |
401 | }; | 401 | }; |
402 | 402 | ||
403 | /*! | ||
404 | \class QDawg qdawg.h | ||
405 | \brief The QDawg class provides an implementation of a Directed Acyclic Word Graph. | ||
403 | 406 | ||
407 | A DAWG provides very fast look-up of words in a word list. | ||
408 | |||
409 | The word list is created using readFile(), read() or | ||
410 | createFromWords(). A list of all the DAWG's words is returned by | ||
411 | allWords(), and the total number of words is returned by | ||
412 | countWords(). Use contains() to see if a particular word is in the | ||
413 | DAWG. The root \link qdawg-node.html node\endlink is returned by root(). | ||
414 | |||
415 | A global DAWG is maintained for the current locale. See the | ||
416 | \l Global class for details. | ||
417 | |||
418 | The structure of a DAWG is a graph of \link qdawg-node.html | ||
419 | Nodes\endlink. There are no cycles in the graph (since there are no | ||
420 | inifinitely repeating words). Each \link qdawg-node.html | ||
421 | Node\endlink is a member of a list of \link qdawg-node.html | ||
422 | Nodes\endlink called a child list. Each \link qdawg-node.html | ||
423 | Node\endlink in the child list has a \e letter, an \e isWord flag, | ||
424 | at most one \e jump arc, and at most one arc to the next child in | ||
425 | the list. | ||
426 | |||
427 | If you traverse the \link qdawg-node.html Nodes\endlink in a DAWG, | ||
428 | starting from the root(), and you concatenate all the letters from | ||
429 | the single child in each child list that you visit, at every \link | ||
430 | qdawg-node.html Node\endlink which has the isWord flag set your | ||
431 | concatenation will be a word in the list represented by the DAWG. | ||
432 | |||
433 | For example, the DAWG below represents the word list: | ||
434 | ban, band, can, cane, cans, pan, pane, pans. | ||
435 | |||
436 | This structuring not only provides O(1) lookup of words in the word list, | ||
437 | but also produces a smaller storage file than a plain text file word list. | ||
438 | |||
439 | \img qdawg.png | ||
440 | */ | ||
441 | |||
442 | /*! | ||
443 | Constructs a new empty DAWG. | ||
444 | */ | ||
404 | QDawg::QDawg() | 445 | QDawg::QDawg() |
405 | { | 446 | { |
406 | d = 0; | 447 | d = 0; |
407 | } | 448 | } |
408 | 449 | ||
450 | /*! | ||
451 | Deletes the DAWG. | ||
452 | */ | ||
409 | QDawg::~QDawg() | 453 | QDawg::~QDawg() |
410 | { | 454 | { |
411 | delete d; | 455 | delete d; |
412 | } | 456 | } |
413 | 457 | ||
458 | /*! | ||
459 | \overload | ||
460 | Replaces all the DAWG's words with words read from \a dev. | ||
461 | */ | ||
414 | bool QDawg::createFromWords(QIODevice* dev) | 462 | bool QDawg::createFromWords(QIODevice* dev) |
415 | { | 463 | { |
416 | delete d; | 464 | delete d; |
417 | 465 | ||
418 | QTextStream i(dev); | 466 | QTextStream i(dev); |
419 | QTrie* trie = new QTrie; | 467 | QTrie* trie = new QTrie; |
420 | int n=0; | 468 | int n=0; |
421 | while (!i.atEnd()) { | 469 | while (!i.atEnd()) { |
422 | trie->insertWord(QString::fromUtf8(i.readLine())); | 470 | trie->insertWord(QString::fromUtf8(i.readLine())); |
423 | n++; | 471 | n++; |
424 | } | 472 | } |
425 | if ( n ) | 473 | if ( n ) |
426 | d = new QDawgPrivate(trie); | 474 | d = new QDawgPrivate(trie); |
427 | else | 475 | else |
428 | d = 0; | 476 | d = 0; |
429 | return TRUE; | 477 | return TRUE; |
430 | } | 478 | } |
431 | 479 | ||
480 | /*! | ||
481 | Replaces all the DAWG's words with the words in the \a list. | ||
482 | */ | ||
432 | void QDawg::createFromWords(const QStringList& list) | 483 | void QDawg::createFromWords(const QStringList& list) |
433 | { | 484 | { |
434 | delete d; | 485 | delete d; |
435 | 486 | ||
436 | if ( list.count() ) { | 487 | if ( list.count() ) { |
437 | QTrie* trie = new QTrie; | 488 | QTrie* trie = new QTrie; |
438 | for (QStringList::ConstIterator it = list.begin(); it != list.end(); ++it) { | 489 | for (QStringList::ConstIterator it = list.begin(); it != list.end(); ++it) { |
439 | trie->insertWord(*it); | 490 | trie->insertWord(*it); |
440 | } | 491 | } |
441 | d = new QDawgPrivate(trie); | 492 | d = new QDawgPrivate(trie); |
442 | } else { | 493 | } else { |
443 | d = 0; | 494 | d = 0; |
444 | } | 495 | } |
445 | } | 496 | } |
446 | 497 | ||
498 | /*! | ||
499 | Returns a list of all the words in the DAWG. | ||
500 | */ | ||
447 | QStringList QDawg::allWords() const | 501 | QStringList QDawg::allWords() const |
448 | { | 502 | { |
449 | QStringList result; | 503 | QStringList result; |
450 | if ( d ) d->appendAllWords(result); | 504 | if ( d ) d->appendAllWords(result); |
451 | return result; | 505 | return result; |
452 | } | 506 | } |
453 | 507 | ||
454 | 508 | ||
509 | /*! | ||
510 | Replaces the DAWG with the DAWG in \a filename. | ||
511 | The file is memory-mapped. | ||
512 | |||
513 | \sa write() | ||
514 | */ | ||
455 | bool QDawg::readFile(const QString& filename) | 515 | bool QDawg::readFile(const QString& filename) |
456 | { | 516 | { |
457 | delete d; | 517 | delete d; |
458 | d = 0; | 518 | d = 0; |
459 | int f = ::open( QFile::encodeName(filename), O_RDONLY ); | 519 | int f = ::open( QFile::encodeName(filename), O_RDONLY ); |
460 | if ( f < 0 ) | 520 | if ( f < 0 ) |
461 | return FALSE; | 521 | return FALSE; |
462 | struct stat st; | 522 | struct stat st; |
463 | if ( !fstat( f, &st ) ) { | 523 | if ( !fstat( f, &st ) ) { |
464 | char * tmp = (char*)mmap( 0, st.st_size, // any address, whole file | 524 | char * tmp = (char*)mmap( 0, st.st_size, // any address, whole file |
465 | PROT_READ, // read-only memory | 525 | PROT_READ, // read-only memory |
466 | MAP_FILE | MAP_PRIVATE, // swap-backed map from file | 526 | MAP_FILE | MAP_PRIVATE, // swap-backed map from file |
467 | f, 0 ); // from offset 0 of f | 527 | f, 0 ); // from offset 0 of f |
468 | if ( tmp && tmp != (char*)MAP_FAILED ) | 528 | if ( tmp && tmp != (char*)MAP_FAILED ) |
469 | d = new QDawgPrivate((uchar*)tmp); | 529 | d = new QDawgPrivate((uchar*)tmp); |
470 | } | 530 | } |
471 | ::close( f ); | 531 | ::close( f ); |
472 | return d; | 532 | return d; |
473 | } | 533 | } |
474 | 534 | ||
535 | /*! | ||
536 | Replaces the DAWG with the DAWG in \a dev. | ||
537 | The file is memory-mapped. | ||
538 | |||
539 | \sa write() | ||
540 | */ | ||
475 | bool QDawg::read(QIODevice* dev) | 541 | bool QDawg::read(QIODevice* dev) |
476 | { | 542 | { |
477 | delete d; | 543 | delete d; |
478 | d = new QDawgPrivate(dev); | 544 | d = new QDawgPrivate(dev); |
479 | if ( d->ok() ) | 545 | if ( d->ok() ) |
480 | return TRUE; | 546 | return TRUE; |
481 | delete d; | 547 | delete d; |
482 | d = 0; | 548 | d = 0; |
483 | return FALSE; | 549 | return FALSE; |
484 | } | 550 | } |
485 | 551 | ||
552 | /*! | ||
553 | Writes the DAWG to \a dev, in a custom QDAWG format. | ||
554 | */ | ||
486 | bool QDawg::write(QIODevice* dev) const | 555 | bool QDawg::write(QIODevice* dev) const |
487 | { | 556 | { |
488 | return d ? d->write(dev) : TRUE; | 557 | return d ? d->write(dev) : TRUE; |
489 | } | 558 | } |
490 | 559 | ||
560 | /*! | ||
561 | Returns the number of words in the DAWG. | ||
562 | */ | ||
491 | int QDawg::countWords() const | 563 | int QDawg::countWords() const |
492 | { | 564 | { |
493 | return d ? d->countWords() : 0; | 565 | return d ? d->countWords() : 0; |
494 | } | 566 | } |
495 | 567 | ||
568 | /*! | ||
569 | Returns the root \link qdawg-node.html Node\endlink of the DAWG. | ||
570 | */ | ||
496 | const QDawg::Node* QDawg::root() const | 571 | const QDawg::Node* QDawg::root() const |
497 | { | 572 | { |
498 | return d ? d->root() : 0; | 573 | return d ? d->root() : 0; |
499 | } | 574 | } |
500 | 575 | ||
576 | /*! | ||
577 | Returns TRUE if the DAWG contains the word \a s; otherwise returns | ||
578 | FALSE. | ||
579 | */ | ||
501 | bool QDawg::contains(const QString& s) const | 580 | bool QDawg::contains(const QString& s) const |
502 | { | 581 | { |
503 | return d ? d->contains(s) : FALSE; | 582 | return d ? d->contains(s) : FALSE; |
504 | } | 583 | } |
505 | 584 | ||
585 | /*! | ||
586 | \internal | ||
587 | |||
588 | For debugging: prints out the DAWG contents. | ||
589 | */ | ||
506 | void QDawg::dump() const | 590 | void QDawg::dump() const |
507 | { | 591 | { |
508 | if ( d ) d->dump(); | 592 | if ( d ) d->dump(); |
509 | } | 593 | } |
510 | 594 | ||
595 | /*! | ||
596 | \class QDawg::Node qdawg.h | ||
597 | \brief The QDawg::Node class represents one node of a QDawg. | ||
598 | */ | ||
599 | |||
600 | /*! | ||
601 | \fn QChar QDawg::Node::letter() const | ||
602 | |||
603 | Returns this Node's letter. | ||
604 | */ | ||
605 | /*! | ||
606 | \fn bool QDawg::Node::isWord() const | ||
607 | |||
608 | Returns TRUE if this Node is the end of a word; otherwise returns | ||
609 | FALSE. | ||
610 | */ | ||
611 | /*! | ||
612 | \fn bool QDawg::Node::isLast() const | ||
613 | |||
614 | Returns TRUE if this Node is the last in the child list; otherwise | ||
615 | returns FALSE. | ||
616 | */ | ||
617 | /*! | ||
618 | \fn const Node* QDawg::Node::next() const | ||
619 | |||
620 | Returns the next child Node in the child list or 0 if the current | ||
621 | Node isLast(). | ||
622 | */ | ||
623 | /*! | ||
624 | \fn const Node* QDawg::Node::jump() const | ||
625 | |||
626 | Returns the node connected to this Node. | ||
627 | */ | ||