summaryrefslogtreecommitdiff
path: root/library/qdawg.cpp
Unidiff
Diffstat (limited to 'library/qdawg.cpp') (more/less context) (ignore whitespace changes)
-rw-r--r--library/qdawg.cpp2
1 files changed, 0 insertions, 2 deletions
diff --git a/library/qdawg.cpp b/library/qdawg.cpp
index af5dc82..2ea5734 100644
--- a/library/qdawg.cpp
+++ b/library/qdawg.cpp
@@ -1,627 +1,625 @@
1/********************************************************************** 1/**********************************************************************
2** Copyright (C) 2000-2002 Trolltech AS. All rights reserved. 2** Copyright (C) 2000-2002 Trolltech AS. All rights reserved.
3** 3**
4** This file is part of the Qtopia Environment. 4** This file is part of the Qtopia Environment.
5** 5**
6** This file may be distributed and/or modified under the terms of the 6** This file may be distributed and/or modified under the terms of the
7** GNU General Public License version 2 as published by the Free Software 7** GNU General Public License version 2 as published by the Free Software
8** Foundation and appearing in the file LICENSE.GPL included in the 8** Foundation and appearing in the file LICENSE.GPL included in the
9** packaging of this file. 9** packaging of this file.
10** 10**
11** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE 11** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
12** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 12** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
13** 13**
14** See http://www.trolltech.com/gpl/ for GPL licensing information. 14** See http://www.trolltech.com/gpl/ for GPL licensing information.
15** 15**
16** Contact info@trolltech.com if any conditions of this licensing are 16** Contact info@trolltech.com if any conditions of this licensing are
17** not clear to you. 17** not clear to you.
18** 18**
19**********************************************************************/ 19**********************************************************************/
20#include "qdawg.h" 20#include "qdawg.h"
21#include <qintdict.h> 21#include <qintdict.h>
22#include <qvaluelist.h>
23#include <qtextstream.h>
24#include <qfile.h> 22#include <qfile.h>
25#include <qtl.h> 23#include <qtl.h>
26 24
27#include <limits.h> 25#include <limits.h>
28#include <stdio.h> 26#include <stdio.h>
29 27
30// for mmap 28// for mmap
31#include <sys/types.h> 29#include <sys/types.h>
32#include <sys/stat.h> 30#include <sys/stat.h>
33#include <sys/mman.h> 31#include <sys/mman.h>
34#include <fcntl.h> 32#include <fcntl.h>
35#include <errno.h> 33#include <errno.h>
36#include <unistd.h> 34#include <unistd.h>
37 35
38class QDawgPrivate; 36class QDawgPrivate;
39class QTrie; 37class QTrie;
40 38
41typedef QValueList<QTrie*> TrieClub; 39typedef QValueList<QTrie*> TrieClub;
42typedef QIntDict<TrieClub> TrieClubDirectory; 40typedef QIntDict<TrieClub> TrieClubDirectory;
43 41
44class TriePtr { 42class TriePtr {
45public: 43public:
46 QChar letter; 44 QChar letter;
47 QTrie* p; 45 QTrie* p;
48 int operator <(const TriePtr& o) const; 46 int operator <(const TriePtr& o) const;
49 int operator >(const TriePtr& o) const; 47 int operator >(const TriePtr& o) const;
50 int operator <=(const TriePtr& o) const; 48 int operator <=(const TriePtr& o) const;
51}; 49};
52 50
53class TrieList : public QValueList<TriePtr> { 51class TrieList : public QValueList<TriePtr> {
54 bool sorted; 52 bool sorted;
55public: 53public:
56 TrieList() 54 TrieList()
57 { 55 {
58 sorted=TRUE; 56 sorted=TRUE;
59 } 57 }
60 58
61 QTrie* findAdd(QChar c); 59 QTrie* findAdd(QChar c);
62 bool equal(TrieList& l); 60 bool equal(TrieList& l);
63 61
64 void sort() 62 void sort()
65 { 63 {
66 if ( !sorted ) { 64 if ( !sorted ) {
67 qHeapSort(*this); 65 qHeapSort(*this);
68 sorted = TRUE; 66 sorted = TRUE;
69 } 67 }
70 } 68 }
71}; 69};
72 70
73// A fast but memory-wasting temporary class. The Dawg is the goal. 71// A fast but memory-wasting temporary class. The Dawg is the goal.
74class QTrie { 72class QTrie {
75public: 73public:
76 QTrie(); 74 QTrie();
77 ~QTrie(); 75 ~QTrie();
78 76
79 void insertWord(const QString& s, uint index=0); 77 void insertWord(const QString& s, uint index=0);
80 bool equal(QTrie* o); 78 bool equal(QTrie* o);
81 void dump(int indent=0); 79 void dump(int indent=0);
82 80
83private: 81private:
84 TrieList children; 82 TrieList children;
85 bool isword; 83 bool isword;
86 84
87 friend class QDawgPrivate; 85 friend class QDawgPrivate;
88 int maxdepth; 86 int maxdepth;
89 int decendants; 87 int decendants;
90 int key; 88 int key;
91 void distributeKeys(TrieClubDirectory& directory); 89 void distributeKeys(TrieClubDirectory& directory);
92 QTrie* clubLeader(TrieClubDirectory& directory); 90 QTrie* clubLeader(TrieClubDirectory& directory);
93 int collectKeys(); 91 int collectKeys();
94 friend class TriePtr; 92 friend class TriePtr;
95 friend class TrieList; 93 friend class TrieList;
96}; 94};
97 95
98QTrie::QTrie() 96QTrie::QTrie()
99{ 97{
100 key = 0; 98 key = 0;
101 isword = FALSE; 99 isword = FALSE;
102} 100}
103 101
104QTrie::~QTrie() 102QTrie::~QTrie()
105{ 103{
106 // NOTE: we do not delete the children - after conversion to DAWG 104 // NOTE: we do not delete the children - after conversion to DAWG
107 // it's too difficult. The QTrie's are deleted via the directory. 105 // it's too difficult. The QTrie's are deleted via the directory.
108} 106}
109 107
110void QTrie::insertWord(const QString& s, uint index) 108void QTrie::insertWord(const QString& s, uint index)
111{ 109{
112 if ( index == s.length() ) { 110 if ( index == s.length() ) {
113 isword = TRUE; 111 isword = TRUE;
114 } else { 112 } else {
115 QTrie* t = children.findAdd(s[index]); 113 QTrie* t = children.findAdd(s[index]);
116 t->insertWord(s,index+1); 114 t->insertWord(s,index+1);
117 } 115 }
118} 116}
119 117
120bool QTrie::equal(QTrie* o) 118bool QTrie::equal(QTrie* o)
121{ 119{
122 if ( o == this ) return TRUE; 120 if ( o == this ) return TRUE;
123 if ( isword != o->isword ) 121 if ( isword != o->isword )
124 return FALSE; 122 return FALSE;
125 return children.equal(o->children); 123 return children.equal(o->children);
126} 124}
127 125
128void QTrie::dump(int indent) 126void QTrie::dump(int indent)
129{ 127{
130 for (TrieList::Iterator it=children.begin(); it!=children.end(); ++it) { 128 for (TrieList::Iterator it=children.begin(); it!=children.end(); ++it) {
131 QTrie* s = (*it).p; 129 QTrie* s = (*it).p;
132 for (int in=0; in<indent; in++) 130 for (int in=0; in<indent; in++)
133 fputc(' ',stderr); 131 fputc(' ',stderr);
134 fprintf(stderr," %c %d %s %p\n",(*it).letter.unicode(), 132 fprintf(stderr," %c %d %s %p\n",(*it).letter.unicode(),
135 s->key,s->isword?"word":"",s); 133 s->key,s->isword?"word":"",s);
136 s->dump(indent+2); 134 s->dump(indent+2);
137 } 135 }
138} 136}
139 137
140void QTrie::distributeKeys(TrieClubDirectory& directory) 138void QTrie::distributeKeys(TrieClubDirectory& directory)
141{ 139{
142 maxdepth = INT_MIN; 140 maxdepth = INT_MIN;
143 decendants = children.count(); 141 decendants = children.count();
144 key = 0; 142 key = 0;
145 for (TrieList::Iterator it=children.begin(); it!=children.end(); ++it) { 143 for (TrieList::Iterator it=children.begin(); it!=children.end(); ++it) {
146 QTrie* s = (*it).p; 144 QTrie* s = (*it).p;
147 QChar l = (*it).letter; 145 QChar l = (*it).letter;
148 s->distributeKeys(directory); 146 s->distributeKeys(directory);
149 key = key*64+l.unicode()+s->key*5; 147 key = key*64+l.unicode()+s->key*5;
150 decendants += s->decendants; 148 decendants += s->decendants;
151 if ( s->maxdepth+1 > maxdepth ) 149 if ( s->maxdepth+1 > maxdepth )
152 maxdepth = s->maxdepth+1; 150 maxdepth = s->maxdepth+1;
153 } 151 }
154 if ( decendants ) { 152 if ( decendants ) {
155 key += decendants + maxdepth*256 + children.count() * 65536; 153 key += decendants + maxdepth*256 + children.count() * 65536;
156 if ( !key ) key++; // unlikely 154 if ( !key ) key++; // unlikely
157 } 155 }
158 TrieClub* c = directory[key]; 156 TrieClub* c = directory[key];
159 if ( !c ) directory.insert(key, (c = new TrieClub) ); 157 if ( !c ) directory.insert(key, (c = new TrieClub) );
160 c->prepend(this); 158 c->prepend(this);
161} 159}
162 160
163QTrie* QTrie::clubLeader(TrieClubDirectory& directory) 161QTrie* QTrie::clubLeader(TrieClubDirectory& directory)
164{ 162{
165 if ( !key ) return directory[0]->first(); 163 if ( !key ) return directory[0]->first();
166 for (TrieList::Iterator it=children.begin(); it!=children.end(); ++it) { 164 for (TrieList::Iterator it=children.begin(); it!=children.end(); ++it) {
167 QTrie* t= (*it).p->clubLeader(directory); 165 QTrie* t= (*it).p->clubLeader(directory);
168 (*it).p = t; 166 (*it).p = t;
169 } 167 }
170 TrieClub *club = directory[key]; 168 TrieClub *club = directory[key];
171 for (TrieClub::Iterator it = club->begin(); it != club->end(); ++it) { 169 for (TrieClub::Iterator it = club->begin(); it != club->end(); ++it) {
172 QTrie* o = *it; 170 QTrie* o = *it;
173 if ( o->equal(this) ) 171 if ( o->equal(this) )
174 return o; 172 return o;
175 } 173 }
176 return this; 174 return this;
177} 175}
178 176
179int QTrie::collectKeys() 177int QTrie::collectKeys()
180{ 178{
181 int n=0; 179 int n=0;
182 if ( key ) key=0,n+=children.count(); 180 if ( key ) key=0,n+=children.count();
183 for (TrieList::Iterator it=children.begin(); it!=children.end(); ++it) 181 for (TrieList::Iterator it=children.begin(); it!=children.end(); ++it)
184 n += (*it).p->collectKeys(); 182 n += (*it).p->collectKeys();
185 return n; 183 return n;
186} 184}
187 185
188int TriePtr::operator <(const TriePtr& o) const 186int TriePtr::operator <(const TriePtr& o) const
189 { return letter < o.letter; } 187 { return letter < o.letter; }
190int TriePtr::operator >(const TriePtr& o) const 188int TriePtr::operator >(const TriePtr& o) const
191 { return letter > o.letter; } 189 { return letter > o.letter; }
192int TriePtr::operator <=(const TriePtr& o) const 190int TriePtr::operator <=(const TriePtr& o) const
193 { return letter <= o.letter; } 191 { return letter <= o.letter; }
194 192
195bool TrieList::equal(TrieList& l) 193bool TrieList::equal(TrieList& l)
196{ 194{
197 if ( count() != l.count() ) 195 if ( count() != l.count() )
198 return FALSE; 196 return FALSE;
199 sort(); l.sort(); 197 sort(); l.sort();
200 ConstIterator it2 = begin(); 198 ConstIterator it2 = begin();
201 ConstIterator it = l.begin(); 199 ConstIterator it = l.begin();
202 for( ; it != l.end(); ++it, ++it2 ) 200 for( ; it != l.end(); ++it, ++it2 )
203 if ( (*it).letter != (*it2).letter || ! (*it).p->equal((*it2).p) ) 201 if ( (*it).letter != (*it2).letter || ! (*it).p->equal((*it2).p) )
204 return FALSE; 202 return FALSE;
205 return TRUE; 203 return TRUE;
206} 204}
207QTrie* TrieList::findAdd(QChar c) 205QTrie* TrieList::findAdd(QChar c)
208{ 206{
209 for (Iterator it=begin(); it!=end(); ++it) { 207 for (Iterator it=begin(); it!=end(); ++it) {
210 if ( (*it).letter == c ) 208 if ( (*it).letter == c )
211 return (*it).p; 209 return (*it).p;
212 } 210 }
213 TriePtr p; 211 TriePtr p;
214 p.p = new QTrie; 212 p.p = new QTrie;
215 p.letter = c; 213 p.letter = c;
216 prepend(p); 214 prepend(p);
217 sorted=FALSE; 215 sorted=FALSE;
218 sort(); 216 sort();
219 return p.p; 217 return p.p;
220} 218}
221 219
222static const char* dawg_sig = "QDAWG100"; 220static const char* dawg_sig = "QDAWG100";
223 221
224class QDawgPrivate { 222class QDawgPrivate {
225public: 223public:
226 QDawgPrivate(QIODevice* dev) 224 QDawgPrivate(QIODevice* dev)
227 { 225 {
228 QDataStream ds(dev); 226 QDataStream ds(dev);
229 char sig[8]; 227 char sig[8];
230 ds.readRawBytes(sig,8); 228 ds.readRawBytes(sig,8);
231 if ( !strncmp(dawg_sig,sig,8) ) { 229 if ( !strncmp(dawg_sig,sig,8) ) {
232 uint n; 230 uint n;
233 char* nn; 231 char* nn;
234 ds.readBytes(nn,n); 232 ds.readBytes(nn,n);
235 233
236 // #### endianness problem ignored. 234 // #### endianness problem ignored.
237 node = (QDawg::Node*)nn; 235 node = (QDawg::Node*)nn;
238 nodes = n / sizeof(QDawg::Node); 236 nodes = n / sizeof(QDawg::Node);
239 } else { 237 } else {
240 node = 0; 238 node = 0;
241 } 239 }
242 } 240 }
243 241
244 bool ok() const { return node; } 242 bool ok() const { return node; }
245 243
246 QDawgPrivate(uchar* mem) 244 QDawgPrivate(uchar* mem)
247 { 245 {
248 if ( !strncmp(dawg_sig,(char*)mem,8) ) { 246 if ( !strncmp(dawg_sig,(char*)mem,8) ) {
249 mem += 8; 247 mem += 8;
250 248
251 int n = ((mem[0]*256+mem[1])*256+mem[2])*256+mem[3]; 249 int n = ((mem[0]*256+mem[1])*256+mem[2])*256+mem[3];
252 mem += 4; 250 mem += 4;
253 251
254 // #### endianness problem ignored. 252 // #### endianness problem ignored.
255 node = (QDawg::Node*)((char*)mem); 253 node = (QDawg::Node*)((char*)mem);
256 nodes = n / sizeof(QDawg::Node); 254 nodes = n / sizeof(QDawg::Node);
257 } 255 }
258 } 256 }
259 257
260 QDawgPrivate(QTrie* t) // destroys the QTrie. 258 QDawgPrivate(QTrie* t) // destroys the QTrie.
261 { 259 {
262 TrieClubDirectory directory(9973); 260 TrieClubDirectory directory(9973);
263 t->distributeKeys(directory); 261 t->distributeKeys(directory);
264 QTrie* l = t->clubLeader(directory); 262 QTrie* l = t->clubLeader(directory);
265 ASSERT(l==t); 263 ASSERT(l==t);
266 generateArray(t); 264 generateArray(t);
267 265
268 TrieClub *club; 266 TrieClub *club;
269 for (QIntDictIterator<TrieClub> dit(directory); (club=dit); ++dit) 267 for (QIntDictIterator<TrieClub> dit(directory); (club=dit); ++dit)
270 { 268 {
271 for (TrieClub::Iterator it = club->begin(); it != club->end(); ++it) { 269 for (TrieClub::Iterator it = club->begin(); it != club->end(); ++it) {
272 delete *it; 270 delete *it;
273 } 271 }
274 delete club; 272 delete club;
275 } 273 }
276 } 274 }
277 275
278 bool write(QIODevice* dev) 276 bool write(QIODevice* dev)
279 { 277 {
280 QDataStream ds(dev); 278 QDataStream ds(dev);
281 ds.writeRawBytes(dawg_sig,8); 279 ds.writeRawBytes(dawg_sig,8);
282 // #### endianness problem ignored. 280 // #### endianness problem ignored.
283 ds.writeBytes((char*)node,sizeof(QDawg::Node)*nodes); 281 ds.writeBytes((char*)node,sizeof(QDawg::Node)*nodes);
284 return dev->state() == IO_Ok; 282 return dev->state() == IO_Ok;
285 } 283 }
286 284
287 void dumpWords(int nid=0, int index=0) 285 void dumpWords(int nid=0, int index=0)
288 { 286 {
289 static char word[256]; // ick latin1 287 static char word[256]; // ick latin1
290 int i=0; 288 int i=0;
291 do { 289 do {
292 QDawg::Node& n = node[nid+i]; 290 QDawg::Node& n = node[nid+i];
293 word[index] = n.let; 291 word[index] = n.let;
294 if ( n.isword ) 292 if ( n.isword )
295 fprintf(stderr,"%.*s\n",index+1,word); 293 fprintf(stderr,"%.*s\n",index+1,word);
296 if ( n.offset ) dumpWords(n.offset+nid+i,index+1); 294 if ( n.offset ) dumpWords(n.offset+nid+i,index+1);
297 } while (!node[nid+i++].islast); 295 } while (!node[nid+i++].islast);
298 } 296 }
299 297
300 void dump(int nid=0, int indent=0) 298 void dump(int nid=0, int indent=0)
301 { 299 {
302 int i=0; 300 int i=0;
303 do { 301 do {
304 QDawg::Node& n = node[nid+i]; 302 QDawg::Node& n = node[nid+i];
305 fprintf(stderr,"%d: ",nid+i); 303 fprintf(stderr,"%d: ",nid+i);
306 for (int in=0; in<indent; in++) 304 for (int in=0; in<indent; in++)
307 fputc(' ',stderr); 305 fputc(' ',stderr);
308 fprintf(stderr," %c %d %d %d\n",n.let, 306 fprintf(stderr," %c %d %d %d\n",n.let,
309 n.isword,n.islast,n.offset); 307 n.isword,n.islast,n.offset);
310 if ( n.offset ) dump(n.offset+nid+i,indent+2); 308 if ( n.offset ) dump(n.offset+nid+i,indent+2);
311 } while (!node[nid+i++].islast); 309 } while (!node[nid+i++].islast);
312 } 310 }
313 311
314 int countWords(int nid=0) 312 int countWords(int nid=0)
315 { 313 {
316 int t=0; 314 int t=0;
317 int i=0; 315 int i=0;
318 do { 316 do {
319 QDawg::Node& n = node[nid+i]; 317 QDawg::Node& n = node[nid+i];
320 if ( n.isword ) 318 if ( n.isword )
321 t++; 319 t++;
322 if ( n.offset ) 320 if ( n.offset )
323 t+=countWords(n.offset+nid+i); 321 t+=countWords(n.offset+nid+i);
324 } while (!node[nid+i++].islast); 322 } while (!node[nid+i++].islast);
325 return t; 323 return t;
326 } 324 }
327 325
328 bool contains(const QString& s, int nid=0, int index=0) const 326 bool contains(const QString& s, int nid=0, int index=0) const
329 { 327 {
330 int i=0; 328 int i=0;
331 do { 329 do {
332 QDawg::Node& n = node[nid+i]; 330 QDawg::Node& n = node[nid+i];
333 if ( s[index] == QChar((ushort)n.let) ) { 331 if ( s[index] == QChar((ushort)n.let) ) {
334 if ( n.isword && index == (int)s.length()-1 ) 332 if ( n.isword && index == (int)s.length()-1 )
335 return TRUE; 333 return TRUE;
336 if ( n.offset ) 334 if ( n.offset )
337 return contains(s,n.offset+nid+i,index+1); 335 return contains(s,n.offset+nid+i,index+1);
338 } 336 }
339 } while (!node[nid+i++].islast); 337 } while (!node[nid+i++].islast);
340 return FALSE; 338 return FALSE;
341 } 339 }
342 340
343 void appendAllWords(QStringList& list, int nid=0, QString s="") const 341 void appendAllWords(QStringList& list, int nid=0, QString s="") const
344 { 342 {
345 int i=0; 343 int i=0;
346 int next = s.length(); 344 int next = s.length();
347 do { 345 do {
348 QDawg::Node& n = node[nid+i]; 346 QDawg::Node& n = node[nid+i];
349 s[next] = QChar((ushort)n.let); 347 s[next] = QChar((ushort)n.let);
350 if ( n.isword ) 348 if ( n.isword )
351 list.append(s); 349 list.append(s);
352 if ( n.offset ) 350 if ( n.offset )
353 appendAllWords(list, n.offset+nid+i, s); 351 appendAllWords(list, n.offset+nid+i, s);
354 } while (!node[nid+i++].islast); 352 } while (!node[nid+i++].islast);
355 } 353 }
356 354
357 const QDawg::Node* root() { return node; } 355 const QDawg::Node* root() { return node; }
358 356
359private: 357private:
360 void generateArray(QTrie* t) 358 void generateArray(QTrie* t)
361 { 359 {
362 nodes = 0; 360 nodes = 0;
363 int n = t->collectKeys(); 361 int n = t->collectKeys();
364 node = new QDawg::Node[n]; 362 node = new QDawg::Node[n];
365 appendToArray(t); 363 appendToArray(t);
366 ASSERT(n == nodes); 364 ASSERT(n == nodes);
367 } 365 }
368 366
369 int appendToArray(QTrie* t) 367 int appendToArray(QTrie* t)
370 { 368 {
371 if ( !t->key ) { 369 if ( !t->key ) {
372 if ( !t->children.count() ) 370 if ( !t->children.count() )
373 return 0; 371 return 0;
374 t->key = nodes; 372 t->key = nodes;
375 nodes += t->children.count(); 373 nodes += t->children.count();
376 QDawg::Node* n = &node[t->key-1]; 374 QDawg::Node* n = &node[t->key-1];
377 int here = t->key; 375 int here = t->key;
378 for (TrieList::Iterator it=t->children.begin(); it!=t->children.end(); ++it) { 376 for (TrieList::Iterator it=t->children.begin(); it!=t->children.end(); ++it) {
379 QTrie* s = (*it).p; 377 QTrie* s = (*it).p;
380 ++n; 378 ++n;
381 n->let = (*it).letter.unicode(); 379 n->let = (*it).letter.unicode();
382 n->isword = s->isword; 380 n->isword = s->isword;
383 n->islast = 0; 381 n->islast = 0;
384 n->offset = appendToArray(s); 382 n->offset = appendToArray(s);
385 if ( n->offset ) { 383 if ( n->offset ) {
386 int t = n->offset-here; 384 int t = n->offset-here;
387 n->offset=t; 385 n->offset=t;
388 if ( n->offset != t ) 386 if ( n->offset != t )
389 qWarning("Overflow: too many words"); 387 qWarning("Overflow: too many words");
390 } 388 }
391 here++; 389 here++;
392 } 390 }
393 n->islast = 1; 391 n->islast = 1;
394 } 392 }
395 return t->key; 393 return t->key;
396 } 394 }
397 395
398private: 396private:
399 int nodes; 397 int nodes;
400 QDawg::Node *node; 398 QDawg::Node *node;
401}; 399};
402 400
403/*! 401/*!
404 \class QDawg qdawg.h 402 \class QDawg qdawg.h
405 \brief The QDawg class provides an implementation of a Directed Acyclic Word Graph. 403 \brief The QDawg class provides an implementation of a Directed Acyclic Word Graph.
406 404
407 A DAWG provides very fast look-up of words in a word list. 405 A DAWG provides very fast look-up of words in a word list.
408 406
409 The word list is created using readFile(), read() or 407 The word list is created using readFile(), read() or
410 createFromWords(). A list of all the DAWG's words is returned by 408 createFromWords(). A list of all the DAWG's words is returned by
411 allWords(), and the total number of words is returned by 409 allWords(), and the total number of words is returned by
412 countWords(). Use contains() to see if a particular word is in the 410 countWords(). Use contains() to see if a particular word is in the
413 DAWG. The root \link qdawg-node.html node\endlink is returned by root(). 411 DAWG. The root \link qdawg-node.html node\endlink is returned by root().
414 412
415 A global DAWG is maintained for the current locale. See the 413 A global DAWG is maintained for the current locale. See the
416 \l Global class for details. 414 \l Global class for details.
417 415
418 The structure of a DAWG is a graph of \link qdawg-node.html 416 The structure of a DAWG is a graph of \link qdawg-node.html
419 Nodes\endlink. There are no cycles in the graph (since there are no 417 Nodes\endlink. There are no cycles in the graph (since there are no
420 inifinitely repeating words). Each \link qdawg-node.html 418 inifinitely repeating words). Each \link qdawg-node.html
421 Node\endlink is a member of a list of \link qdawg-node.html 419 Node\endlink is a member of a list of \link qdawg-node.html
422 Nodes\endlink called a child list. Each \link qdawg-node.html 420 Nodes\endlink called a child list. Each \link qdawg-node.html
423 Node\endlink in the child list has a \e letter, an \e isWord flag, 421 Node\endlink in the child list has a \e letter, an \e isWord flag,
424 at most one \e jump arc, and at most one arc to the next child in 422 at most one \e jump arc, and at most one arc to the next child in
425 the list. 423 the list.
426 424
427 If you traverse the \link qdawg-node.html Nodes\endlink in a DAWG, 425 If you traverse the \link qdawg-node.html Nodes\endlink in a DAWG,
428 starting from the root(), and you concatenate all the letters from 426 starting from the root(), and you concatenate all the letters from
429 the single child in each child list that you visit, at every \link 427 the single child in each child list that you visit, at every \link
430 qdawg-node.html Node\endlink which has the isWord flag set your 428 qdawg-node.html Node\endlink which has the isWord flag set your
431 concatenation will be a word in the list represented by the DAWG. 429 concatenation will be a word in the list represented by the DAWG.
432 430
433 For example, the DAWG below represents the word list: 431 For example, the DAWG below represents the word list:
434 ban, band, can, cane, cans, pan, pane, pans. 432 ban, band, can, cane, cans, pan, pane, pans.
435 433
436 This structuring not only provides O(1) lookup of words in the word list, 434 This structuring not only provides O(1) lookup of words in the word list,
437 but also produces a smaller storage file than a plain text file word list. 435 but also produces a smaller storage file than a plain text file word list.
438 436
439 \img qdawg.png 437 \img qdawg.png
440*/ 438*/
441 439
442/*! 440/*!
443 Constructs a new empty DAWG. 441 Constructs a new empty DAWG.
444*/ 442*/
445QDawg::QDawg() 443QDawg::QDawg()
446{ 444{
447 d = 0; 445 d = 0;
448} 446}
449 447
450/*! 448/*!
451 Deletes the DAWG. 449 Deletes the DAWG.
452*/ 450*/
453QDawg::~QDawg() 451QDawg::~QDawg()
454{ 452{
455 delete d; 453 delete d;
456} 454}
457 455
458/*! 456/*!
459 \overload 457 \overload
460 Replaces all the DAWG's words with words read from \a dev. 458 Replaces all the DAWG's words with words read from \a dev.
461*/ 459*/
462bool QDawg::createFromWords(QIODevice* dev) 460bool QDawg::createFromWords(QIODevice* dev)
463{ 461{
464 delete d; 462 delete d;
465 463
466 QTextStream i(dev); 464 QTextStream i(dev);
467 QTrie* trie = new QTrie; 465 QTrie* trie = new QTrie;
468 int n=0; 466 int n=0;
469 while (!i.atEnd()) { 467 while (!i.atEnd()) {
470 trie->insertWord(QString::fromUtf8(i.readLine())); 468 trie->insertWord(QString::fromUtf8(i.readLine()));
471 n++; 469 n++;
472 } 470 }
473 if ( n ) 471 if ( n )
474 d = new QDawgPrivate(trie); 472 d = new QDawgPrivate(trie);
475 else 473 else
476 d = 0; 474 d = 0;
477 return TRUE; 475 return TRUE;
478} 476}
479 477
480/*! 478/*!
481 Replaces all the DAWG's words with the words in the \a list. 479 Replaces all the DAWG's words with the words in the \a list.
482*/ 480*/
483void QDawg::createFromWords(const QStringList& list) 481void QDawg::createFromWords(const QStringList& list)
484{ 482{
485 delete d; 483 delete d;
486 484
487 if ( list.count() ) { 485 if ( list.count() ) {
488 QTrie* trie = new QTrie; 486 QTrie* trie = new QTrie;
489 for (QStringList::ConstIterator it = list.begin(); it != list.end(); ++it) { 487 for (QStringList::ConstIterator it = list.begin(); it != list.end(); ++it) {
490 trie->insertWord(*it); 488 trie->insertWord(*it);
491 } 489 }
492 d = new QDawgPrivate(trie); 490 d = new QDawgPrivate(trie);
493 } else { 491 } else {
494 d = 0; 492 d = 0;
495 } 493 }
496} 494}
497 495
498/*! 496/*!
499 Returns a list of all the words in the DAWG. 497 Returns a list of all the words in the DAWG.
500*/ 498*/
501QStringList QDawg::allWords() const 499QStringList QDawg::allWords() const
502{ 500{
503 QStringList result; 501 QStringList result;
504 if ( d ) d->appendAllWords(result); 502 if ( d ) d->appendAllWords(result);
505 return result; 503 return result;
506} 504}
507 505
508 506
509/*! 507/*!
510 Replaces the DAWG with the DAWG in \a filename. 508 Replaces the DAWG with the DAWG in \a filename.
511 The file is memory-mapped. 509 The file is memory-mapped.
512 510
513 \sa write() 511 \sa write()
514*/ 512*/
515bool QDawg::readFile(const QString& filename) 513bool QDawg::readFile(const QString& filename)
516{ 514{
517 delete d; 515 delete d;
518 d = 0; 516 d = 0;
519 int f = ::open( QFile::encodeName(filename), O_RDONLY ); 517 int f = ::open( QFile::encodeName(filename), O_RDONLY );
520 if ( f < 0 ) 518 if ( f < 0 )
521 return FALSE; 519 return FALSE;
522 struct stat st; 520 struct stat st;
523 if ( !fstat( f, &st ) ) { 521 if ( !fstat( f, &st ) ) {
524 char * tmp = (char*)mmap( 0, st.st_size, // any address, whole file 522 char * tmp = (char*)mmap( 0, st.st_size, // any address, whole file
525 PROT_READ, // read-only memory 523 PROT_READ, // read-only memory
526 MAP_FILE | MAP_PRIVATE, // swap-backed map from file 524 MAP_FILE | MAP_PRIVATE, // swap-backed map from file
527 f, 0 ); // from offset 0 of f 525 f, 0 ); // from offset 0 of f
528 if ( tmp && tmp != (char*)MAP_FAILED ) 526 if ( tmp && tmp != (char*)MAP_FAILED )
529 d = new QDawgPrivate((uchar*)tmp); 527 d = new QDawgPrivate((uchar*)tmp);
530 } 528 }
531 ::close( f ); 529 ::close( f );
532 return d; 530 return d;
533} 531}
534 532
535/*! 533/*!
536 Replaces the DAWG with the DAWG in \a dev. 534 Replaces the DAWG with the DAWG in \a dev.
537 The file is memory-mapped. 535 The file is memory-mapped.
538 536
539 \sa write() 537 \sa write()
540*/ 538*/
541bool QDawg::read(QIODevice* dev) 539bool QDawg::read(QIODevice* dev)
542{ 540{
543 delete d; 541 delete d;
544 d = new QDawgPrivate(dev); 542 d = new QDawgPrivate(dev);
545 if ( d->ok() ) 543 if ( d->ok() )
546 return TRUE; 544 return TRUE;
547 delete d; 545 delete d;
548 d = 0; 546 d = 0;
549 return FALSE; 547 return FALSE;
550} 548}
551 549
552/*! 550/*!
553 Writes the DAWG to \a dev, in a custom QDAWG format. 551 Writes the DAWG to \a dev, in a custom QDAWG format.
554*/ 552*/
555bool QDawg::write(QIODevice* dev) const 553bool QDawg::write(QIODevice* dev) const
556{ 554{
557 return d ? d->write(dev) : TRUE; 555 return d ? d->write(dev) : TRUE;
558} 556}
559 557
560/*! 558/*!
561 Returns the number of words in the DAWG. 559 Returns the number of words in the DAWG.
562*/ 560*/
563int QDawg::countWords() const 561int QDawg::countWords() const
564{ 562{
565 return d ? d->countWords() : 0; 563 return d ? d->countWords() : 0;
566} 564}
567 565
568/*! 566/*!
569 Returns the root \link qdawg-node.html Node\endlink of the DAWG. 567 Returns the root \link qdawg-node.html Node\endlink of the DAWG.
570*/ 568*/
571const QDawg::Node* QDawg::root() const 569const QDawg::Node* QDawg::root() const
572{ 570{
573 return d ? d->root() : 0; 571 return d ? d->root() : 0;
574} 572}
575 573
576/*! 574/*!
577 Returns TRUE if the DAWG contains the word \a s; otherwise returns 575 Returns TRUE if the DAWG contains the word \a s; otherwise returns
578 FALSE. 576 FALSE.
579*/ 577*/
580bool QDawg::contains(const QString& s) const 578bool QDawg::contains(const QString& s) const
581{ 579{
582 return d ? d->contains(s) : FALSE; 580 return d ? d->contains(s) : FALSE;
583} 581}
584 582
585/*! 583/*!
586 \internal 584 \internal
587 585
588 For debugging: prints out the DAWG contents. 586 For debugging: prints out the DAWG contents.
589*/ 587*/
590void QDawg::dump() const 588void QDawg::dump() const
591{ 589{
592 if ( d ) d->dump(); 590 if ( d ) d->dump();
593} 591}
594 592
595/*! 593/*!
596 \class QDawg::Node qdawg.h 594 \class QDawg::Node qdawg.h
597 \brief The QDawg::Node class represents one node of a QDawg. 595 \brief The QDawg::Node class represents one node of a QDawg.
598*/ 596*/
599 597
600/*! 598/*!
601 \fn QChar QDawg::Node::letter() const 599 \fn QChar QDawg::Node::letter() const
602 600
603 Returns this Node's letter. 601 Returns this Node's letter.
604*/ 602*/
605/*! 603/*!
606 \fn bool QDawg::Node::isWord() const 604 \fn bool QDawg::Node::isWord() const
607 605
608 Returns TRUE if this Node is the end of a word; otherwise returns 606 Returns TRUE if this Node is the end of a word; otherwise returns
609 FALSE. 607 FALSE.
610*/ 608*/
611/*! 609/*!
612 \fn bool QDawg::Node::isLast() const 610 \fn bool QDawg::Node::isLast() const
613 611
614 Returns TRUE if this Node is the last in the child list; otherwise 612 Returns TRUE if this Node is the last in the child list; otherwise
615 returns FALSE. 613 returns FALSE.
616*/ 614*/
617/*! 615/*!
618 \fn const Node* QDawg::Node::next() const 616 \fn const Node* QDawg::Node::next() const
619 617
620 Returns the next child Node in the child list or 0 if the current 618 Returns the next child Node in the child list or 0 if the current
621 Node isLast(). 619 Node isLast().
622*/ 620*/
623/*! 621/*!
624 \fn const Node* QDawg::Node::jump() const 622 \fn const Node* QDawg::Node::jump() const
625 623
626 Returns the node connected to this Node. 624 Returns the node connected to this Node.
627*/ 625*/