summaryrefslogtreecommitdiff
path: root/library/qdawg.cpp
Unidiff
Diffstat (limited to 'library/qdawg.cpp') (more/less context) (ignore whitespace changes)
-rw-r--r--library/qdawg.cpp123
1 files changed, 120 insertions, 3 deletions
diff --git a/library/qdawg.cpp b/library/qdawg.cpp
index 3615693..af5dc82 100644
--- a/library/qdawg.cpp
+++ b/library/qdawg.cpp
@@ -1,510 +1,627 @@
1/********************************************************************** 1/**********************************************************************
2** Copyright (C) 2000 Trolltech AS. All rights reserved. 2** Copyright (C) 2000-2002 Trolltech AS. All rights reserved.
3** 3**
4** This file is part of Qtopia Environment. 4** This file is part of the Qtopia Environment.
5** 5**
6** This file may be distributed and/or modified under the terms of the 6** This file may be distributed and/or modified under the terms of the
7** GNU General Public License version 2 as published by the Free Software 7** GNU General Public License version 2 as published by the Free Software
8** Foundation and appearing in the file LICENSE.GPL included in the 8** Foundation and appearing in the file LICENSE.GPL included in the
9** packaging of this file. 9** packaging of this file.
10** 10**
11** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE 11** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
12** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 12** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
13** 13**
14** See http://www.trolltech.com/gpl/ for GPL licensing information. 14** See http://www.trolltech.com/gpl/ for GPL licensing information.
15** 15**
16** Contact info@trolltech.com if any conditions of this licensing are 16** Contact info@trolltech.com if any conditions of this licensing are
17** not clear to you. 17** not clear to you.
18** 18**
19**********************************************************************/ 19**********************************************************************/
20#include "qdawg.h" 20#include "qdawg.h"
21#include <qintdict.h> 21#include <qintdict.h>
22#include <qvaluelist.h> 22#include <qvaluelist.h>
23#include <qtextstream.h> 23#include <qtextstream.h>
24#include <qfile.h> 24#include <qfile.h>
25#include <qtl.h> 25#include <qtl.h>
26 26
27#include <limits.h> 27#include <limits.h>
28#include <stdio.h> 28#include <stdio.h>
29 29
30// for mmap 30// for mmap
31#include <sys/types.h> 31#include <sys/types.h>
32#include <sys/stat.h> 32#include <sys/stat.h>
33#include <sys/mman.h> 33#include <sys/mman.h>
34#include <fcntl.h> 34#include <fcntl.h>
35#include <errno.h> 35#include <errno.h>
36#include <unistd.h> 36#include <unistd.h>
37 37
38class QDawgPrivate; 38class QDawgPrivate;
39class QTrie; 39class QTrie;
40 40
41typedef QValueList<QTrie*> TrieClub; 41typedef QValueList<QTrie*> TrieClub;
42typedef QIntDict<TrieClub> TrieClubDirectory; 42typedef QIntDict<TrieClub> TrieClubDirectory;
43 43
44class TriePtr { 44class TriePtr {
45public: 45public:
46 QChar letter; 46 QChar letter;
47 QTrie* p; 47 QTrie* p;
48 int operator <(const TriePtr& o) const; 48 int operator <(const TriePtr& o) const;
49 int operator >(const TriePtr& o) const; 49 int operator >(const TriePtr& o) const;
50 int operator <=(const TriePtr& o) const; 50 int operator <=(const TriePtr& o) const;
51}; 51};
52 52
53class TrieList : public QValueList<TriePtr> { 53class TrieList : public QValueList<TriePtr> {
54 bool sorted; 54 bool sorted;
55public: 55public:
56 TrieList() 56 TrieList()
57 { 57 {
58 sorted=TRUE; 58 sorted=TRUE;
59 } 59 }
60 60
61 QTrie* findAdd(QChar c); 61 QTrie* findAdd(QChar c);
62 bool equal(TrieList& l); 62 bool equal(TrieList& l);
63 63
64 void sort() 64 void sort()
65 { 65 {
66 if ( !sorted ) { 66 if ( !sorted ) {
67 qHeapSort(*this); 67 qHeapSort(*this);
68 sorted = TRUE; 68 sorted = TRUE;
69 } 69 }
70 } 70 }
71}; 71};
72 72
73// A fast but memory-wasting temporary class. The Dawg is the goal. 73// A fast but memory-wasting temporary class. The Dawg is the goal.
74class QTrie { 74class QTrie {
75public: 75public:
76 QTrie(); 76 QTrie();
77 ~QTrie(); 77 ~QTrie();
78 78
79 void insertWord(const QString& s, uint index=0); 79 void insertWord(const QString& s, uint index=0);
80 bool equal(QTrie* o); 80 bool equal(QTrie* o);
81 void dump(int indent=0); 81 void dump(int indent=0);
82 82
83private: 83private:
84 TrieList children; 84 TrieList children;
85 bool isword; 85 bool isword;
86 86
87 friend class QDawgPrivate; 87 friend class QDawgPrivate;
88 int maxdepth; 88 int maxdepth;
89 int decendants; 89 int decendants;
90 int key; 90 int key;
91 void distributeKeys(TrieClubDirectory& directory); 91 void distributeKeys(TrieClubDirectory& directory);
92 QTrie* clubLeader(TrieClubDirectory& directory); 92 QTrie* clubLeader(TrieClubDirectory& directory);
93 int collectKeys(); 93 int collectKeys();
94 friend class TriePtr; 94 friend class TriePtr;
95 friend class TrieList; 95 friend class TrieList;
96}; 96};
97 97
98QTrie::QTrie() 98QTrie::QTrie()
99{ 99{
100 key = 0; 100 key = 0;
101 isword = FALSE; 101 isword = FALSE;
102} 102}
103 103
104QTrie::~QTrie() 104QTrie::~QTrie()
105{ 105{
106 // NOTE: we do not delete the children - after conversion to DAWG 106 // NOTE: we do not delete the children - after conversion to DAWG
107 // it's too difficult. The QTrie's are deleted via the directory. 107 // it's too difficult. The QTrie's are deleted via the directory.
108} 108}
109 109
110void QTrie::insertWord(const QString& s, uint index) 110void QTrie::insertWord(const QString& s, uint index)
111{ 111{
112 if ( index == s.length() ) { 112 if ( index == s.length() ) {
113 isword = TRUE; 113 isword = TRUE;
114 } else { 114 } else {
115 QTrie* t = children.findAdd(s[index]); 115 QTrie* t = children.findAdd(s[index]);
116 t->insertWord(s,index+1); 116 t->insertWord(s,index+1);
117 } 117 }
118} 118}
119 119
120bool QTrie::equal(QTrie* o) 120bool QTrie::equal(QTrie* o)
121{ 121{
122 if ( o == this ) return TRUE; 122 if ( o == this ) return TRUE;
123 if ( isword != o->isword ) 123 if ( isword != o->isword )
124 return FALSE; 124 return FALSE;
125 return children.equal(o->children); 125 return children.equal(o->children);
126} 126}
127 127
128void QTrie::dump(int indent) 128void QTrie::dump(int indent)
129{ 129{
130 for (TrieList::Iterator it=children.begin(); it!=children.end(); ++it) { 130 for (TrieList::Iterator it=children.begin(); it!=children.end(); ++it) {
131 QTrie* s = (*it).p; 131 QTrie* s = (*it).p;
132 for (int in=0; in<indent; in++) 132 for (int in=0; in<indent; in++)
133 fputc(' ',stderr); 133 fputc(' ',stderr);
134 fprintf(stderr," %c %d %s %p\n",(*it).letter.unicode(), 134 fprintf(stderr," %c %d %s %p\n",(*it).letter.unicode(),
135 s->key,s->isword?"word":"",s); 135 s->key,s->isword?"word":"",s);
136 s->dump(indent+2); 136 s->dump(indent+2);
137 } 137 }
138} 138}
139 139
140void QTrie::distributeKeys(TrieClubDirectory& directory) 140void QTrie::distributeKeys(TrieClubDirectory& directory)
141{ 141{
142 maxdepth = INT_MIN; 142 maxdepth = INT_MIN;
143 decendants = children.count(); 143 decendants = children.count();
144 key = 0; 144 key = 0;
145 for (TrieList::Iterator it=children.begin(); it!=children.end(); ++it) { 145 for (TrieList::Iterator it=children.begin(); it!=children.end(); ++it) {
146 QTrie* s = (*it).p; 146 QTrie* s = (*it).p;
147 QChar l = (*it).letter; 147 QChar l = (*it).letter;
148 s->distributeKeys(directory); 148 s->distributeKeys(directory);
149 key = key*64+l.unicode()+s->key*5; 149 key = key*64+l.unicode()+s->key*5;
150 decendants += s->decendants; 150 decendants += s->decendants;
151 if ( s->maxdepth+1 > maxdepth ) 151 if ( s->maxdepth+1 > maxdepth )
152 maxdepth = s->maxdepth+1; 152 maxdepth = s->maxdepth+1;
153 } 153 }
154 if ( decendants ) { 154 if ( decendants ) {
155 key += decendants + maxdepth*256 + children.count() * 65536; 155 key += decendants + maxdepth*256 + children.count() * 65536;
156 if ( !key ) key++; // unlikely 156 if ( !key ) key++; // unlikely
157 } 157 }
158 TrieClub* c = directory[key]; 158 TrieClub* c = directory[key];
159 if ( !c ) directory.insert(key, (c = new TrieClub) ); 159 if ( !c ) directory.insert(key, (c = new TrieClub) );
160 c->prepend(this); 160 c->prepend(this);
161} 161}
162 162
163QTrie* QTrie::clubLeader(TrieClubDirectory& directory) 163QTrie* QTrie::clubLeader(TrieClubDirectory& directory)
164{ 164{
165 if ( !key ) return directory[0]->first(); 165 if ( !key ) return directory[0]->first();
166 for (TrieList::Iterator it=children.begin(); it!=children.end(); ++it) { 166 for (TrieList::Iterator it=children.begin(); it!=children.end(); ++it) {
167 QTrie* t= (*it).p->clubLeader(directory); 167 QTrie* t= (*it).p->clubLeader(directory);
168 (*it).p = t; 168 (*it).p = t;
169 } 169 }
170 TrieClub *club = directory[key]; 170 TrieClub *club = directory[key];
171 for (TrieClub::Iterator it = club->begin(); it != club->end(); ++it) { 171 for (TrieClub::Iterator it = club->begin(); it != club->end(); ++it) {
172 QTrie* o = *it; 172 QTrie* o = *it;
173 if ( o->equal(this) ) 173 if ( o->equal(this) )
174 return o; 174 return o;
175 } 175 }
176 return this; 176 return this;
177} 177}
178 178
179int QTrie::collectKeys() 179int QTrie::collectKeys()
180{ 180{
181 int n=0; 181 int n=0;
182 if ( key ) key=0,n+=children.count(); 182 if ( key ) key=0,n+=children.count();
183 for (TrieList::Iterator it=children.begin(); it!=children.end(); ++it) 183 for (TrieList::Iterator it=children.begin(); it!=children.end(); ++it)
184 n += (*it).p->collectKeys(); 184 n += (*it).p->collectKeys();
185 return n; 185 return n;
186} 186}
187 187
188int TriePtr::operator <(const TriePtr& o) const 188int TriePtr::operator <(const TriePtr& o) const
189 { return letter < o.letter; } 189 { return letter < o.letter; }
190int TriePtr::operator >(const TriePtr& o) const 190int TriePtr::operator >(const TriePtr& o) const
191 { return letter > o.letter; } 191 { return letter > o.letter; }
192int TriePtr::operator <=(const TriePtr& o) const 192int TriePtr::operator <=(const TriePtr& o) const
193 { return letter <= o.letter; } 193 { return letter <= o.letter; }
194 194
195bool TrieList::equal(TrieList& l) 195bool TrieList::equal(TrieList& l)
196{ 196{
197 if ( count() != l.count() ) 197 if ( count() != l.count() )
198 return FALSE; 198 return FALSE;
199 sort(); l.sort(); 199 sort(); l.sort();
200 ConstIterator it2 = begin(); 200 ConstIterator it2 = begin();
201 ConstIterator it = l.begin(); 201 ConstIterator it = l.begin();
202 for( ; it != l.end(); ++it, ++it2 ) 202 for( ; it != l.end(); ++it, ++it2 )
203 if ( (*it).letter != (*it2).letter || ! (*it).p->equal((*it2).p) ) 203 if ( (*it).letter != (*it2).letter || ! (*it).p->equal((*it2).p) )
204 return FALSE; 204 return FALSE;
205 return TRUE; 205 return TRUE;
206} 206}
207QTrie* TrieList::findAdd(QChar c) 207QTrie* TrieList::findAdd(QChar c)
208{ 208{
209 for (Iterator it=begin(); it!=end(); ++it) { 209 for (Iterator it=begin(); it!=end(); ++it) {
210 if ( (*it).letter == c ) 210 if ( (*it).letter == c )
211 return (*it).p; 211 return (*it).p;
212 } 212 }
213 TriePtr p; 213 TriePtr p;
214 p.p = new QTrie; 214 p.p = new QTrie;
215 p.letter = c; 215 p.letter = c;
216 prepend(p); 216 prepend(p);
217 sorted=FALSE; 217 sorted=FALSE;
218 sort(); 218 sort();
219 return p.p; 219 return p.p;
220} 220}
221 221
222static const char* dawg_sig = "QDAWG100"; 222static const char* dawg_sig = "QDAWG100";
223 223
224class QDawgPrivate { 224class QDawgPrivate {
225public: 225public:
226 QDawgPrivate(QIODevice* dev) 226 QDawgPrivate(QIODevice* dev)
227 { 227 {
228 QDataStream ds(dev); 228 QDataStream ds(dev);
229 char sig[8]; 229 char sig[8];
230 ds.readRawBytes(sig,8); 230 ds.readRawBytes(sig,8);
231 if ( !strncmp(dawg_sig,sig,8) ) { 231 if ( !strncmp(dawg_sig,sig,8) ) {
232 uint n; 232 uint n;
233 char* nn; 233 char* nn;
234 ds.readBytes(nn,n); 234 ds.readBytes(nn,n);
235 235
236 // #### endianness problem ignored. 236 // #### endianness problem ignored.
237 node = (QDawg::Node*)nn; 237 node = (QDawg::Node*)nn;
238 nodes = n / sizeof(QDawg::Node); 238 nodes = n / sizeof(QDawg::Node);
239 } else { 239 } else {
240 node = 0; 240 node = 0;
241 } 241 }
242 } 242 }
243 243
244 bool ok() const { return node; } 244 bool ok() const { return node; }
245 245
246 QDawgPrivate(uchar* mem) 246 QDawgPrivate(uchar* mem)
247 { 247 {
248 if ( !strncmp(dawg_sig,(char*)mem,8) ) { 248 if ( !strncmp(dawg_sig,(char*)mem,8) ) {
249 mem += 8; 249 mem += 8;
250 250
251 int n = ((mem[0]*256+mem[1])*256+mem[2])*256+mem[3]; 251 int n = ((mem[0]*256+mem[1])*256+mem[2])*256+mem[3];
252 mem += 4; 252 mem += 4;
253 253
254 // #### endianness problem ignored. 254 // #### endianness problem ignored.
255 node = (QDawg::Node*)((char*)mem); 255 node = (QDawg::Node*)((char*)mem);
256 nodes = n / sizeof(QDawg::Node); 256 nodes = n / sizeof(QDawg::Node);
257 } 257 }
258 } 258 }
259 259
260 QDawgPrivate(QTrie* t) // destroys the QTrie. 260 QDawgPrivate(QTrie* t) // destroys the QTrie.
261 { 261 {
262 TrieClubDirectory directory(9973); 262 TrieClubDirectory directory(9973);
263 t->distributeKeys(directory); 263 t->distributeKeys(directory);
264 QTrie* l = t->clubLeader(directory); 264 QTrie* l = t->clubLeader(directory);
265 ASSERT(l==t); 265 ASSERT(l==t);
266 generateArray(t); 266 generateArray(t);
267 267
268 TrieClub *club; 268 TrieClub *club;
269 for (QIntDictIterator<TrieClub> dit(directory); (club=dit); ++dit) 269 for (QIntDictIterator<TrieClub> dit(directory); (club=dit); ++dit)
270 { 270 {
271 for (TrieClub::Iterator it = club->begin(); it != club->end(); ++it) { 271 for (TrieClub::Iterator it = club->begin(); it != club->end(); ++it) {
272 delete *it; 272 delete *it;
273 } 273 }
274 delete club; 274 delete club;
275 } 275 }
276 } 276 }
277 277
278 bool write(QIODevice* dev) 278 bool write(QIODevice* dev)
279 { 279 {
280 QDataStream ds(dev); 280 QDataStream ds(dev);
281 ds.writeRawBytes(dawg_sig,8); 281 ds.writeRawBytes(dawg_sig,8);
282 // #### endianness problem ignored. 282 // #### endianness problem ignored.
283 ds.writeBytes((char*)node,sizeof(QDawg::Node)*nodes); 283 ds.writeBytes((char*)node,sizeof(QDawg::Node)*nodes);
284 return dev->state() == IO_Ok; 284 return dev->state() == IO_Ok;
285 } 285 }
286 286
287 void dumpWords(int nid=0, int index=0) 287 void dumpWords(int nid=0, int index=0)
288 { 288 {
289 static char word[256]; // ick latin1 289 static char word[256]; // ick latin1
290 int i=0; 290 int i=0;
291 do { 291 do {
292 QDawg::Node& n = node[nid+i]; 292 QDawg::Node& n = node[nid+i];
293 word[index] = n.let; 293 word[index] = n.let;
294 if ( n.isword ) 294 if ( n.isword )
295 fprintf(stderr,"%.*s\n",index+1,word); 295 fprintf(stderr,"%.*s\n",index+1,word);
296 if ( n.offset ) dumpWords(n.offset+nid+i,index+1); 296 if ( n.offset ) dumpWords(n.offset+nid+i,index+1);
297 } while (!node[nid+i++].islast); 297 } while (!node[nid+i++].islast);
298 } 298 }
299 299
300 void dump(int nid=0, int indent=0) 300 void dump(int nid=0, int indent=0)
301 { 301 {
302 int i=0; 302 int i=0;
303 do { 303 do {
304 QDawg::Node& n = node[nid+i]; 304 QDawg::Node& n = node[nid+i];
305 fprintf(stderr,"%d: ",nid+i); 305 fprintf(stderr,"%d: ",nid+i);
306 for (int in=0; in<indent; in++) 306 for (int in=0; in<indent; in++)
307 fputc(' ',stderr); 307 fputc(' ',stderr);
308 fprintf(stderr," %c %d %d %d\n",n.let, 308 fprintf(stderr," %c %d %d %d\n",n.let,
309 n.isword,n.islast,n.offset); 309 n.isword,n.islast,n.offset);
310 if ( n.offset ) dump(n.offset+nid+i,indent+2); 310 if ( n.offset ) dump(n.offset+nid+i,indent+2);
311 } while (!node[nid+i++].islast); 311 } while (!node[nid+i++].islast);
312 } 312 }
313 313
314 int countWords(int nid=0) 314 int countWords(int nid=0)
315 { 315 {
316 int t=0; 316 int t=0;
317 int i=0; 317 int i=0;
318 do { 318 do {
319 QDawg::Node& n = node[nid+i]; 319 QDawg::Node& n = node[nid+i];
320 if ( n.isword ) 320 if ( n.isword )
321 t++; 321 t++;
322 if ( n.offset ) 322 if ( n.offset )
323 t+=countWords(n.offset+nid+i); 323 t+=countWords(n.offset+nid+i);
324 } while (!node[nid+i++].islast); 324 } while (!node[nid+i++].islast);
325 return t; 325 return t;
326 } 326 }
327 327
328 bool contains(const QString& s, int nid=0, int index=0) const 328 bool contains(const QString& s, int nid=0, int index=0) const
329 { 329 {
330 int i=0; 330 int i=0;
331 do { 331 do {
332 QDawg::Node& n = node[nid+i]; 332 QDawg::Node& n = node[nid+i];
333 if ( s[index] == QChar((ushort)n.let) ) { 333 if ( s[index] == QChar((ushort)n.let) ) {
334 if ( n.isword && index == (int)s.length()-1 ) 334 if ( n.isword && index == (int)s.length()-1 )
335 return TRUE; 335 return TRUE;
336 if ( n.offset ) 336 if ( n.offset )
337 return contains(s,n.offset+nid+i,index+1); 337 return contains(s,n.offset+nid+i,index+1);
338 } 338 }
339 } while (!node[nid+i++].islast); 339 } while (!node[nid+i++].islast);
340 return FALSE; 340 return FALSE;
341 } 341 }
342 342
343 void appendAllWords(QStringList& list, int nid=0, QString s="") const 343 void appendAllWords(QStringList& list, int nid=0, QString s="") const
344 { 344 {
345 int i=0; 345 int i=0;
346 int next = s.length(); 346 int next = s.length();
347 do { 347 do {
348 QDawg::Node& n = node[nid+i]; 348 QDawg::Node& n = node[nid+i];
349 s[next] = QChar((ushort)n.let); 349 s[next] = QChar((ushort)n.let);
350 if ( n.isword ) 350 if ( n.isword )
351 list.append(s); 351 list.append(s);
352 if ( n.offset ) 352 if ( n.offset )
353 appendAllWords(list, n.offset+nid+i, s); 353 appendAllWords(list, n.offset+nid+i, s);
354 } while (!node[nid+i++].islast); 354 } while (!node[nid+i++].islast);
355 } 355 }
356 356
357 const QDawg::Node* root() { return node; } 357 const QDawg::Node* root() { return node; }
358 358
359private: 359private:
360 void generateArray(QTrie* t) 360 void generateArray(QTrie* t)
361 { 361 {
362 nodes = 0; 362 nodes = 0;
363 int n = t->collectKeys(); 363 int n = t->collectKeys();
364 node = new QDawg::Node[n]; 364 node = new QDawg::Node[n];
365 appendToArray(t); 365 appendToArray(t);
366 ASSERT(n == nodes); 366 ASSERT(n == nodes);
367 } 367 }
368 368
369 int appendToArray(QTrie* t) 369 int appendToArray(QTrie* t)
370 { 370 {
371 if ( !t->key ) { 371 if ( !t->key ) {
372 if ( !t->children.count() ) 372 if ( !t->children.count() )
373 return 0; 373 return 0;
374 t->key = nodes; 374 t->key = nodes;
375 nodes += t->children.count(); 375 nodes += t->children.count();
376 QDawg::Node* n = &node[t->key-1]; 376 QDawg::Node* n = &node[t->key-1];
377 int here = t->key; 377 int here = t->key;
378 for (TrieList::Iterator it=t->children.begin(); it!=t->children.end(); ++it) { 378 for (TrieList::Iterator it=t->children.begin(); it!=t->children.end(); ++it) {
379 QTrie* s = (*it).p; 379 QTrie* s = (*it).p;
380 ++n; 380 ++n;
381 n->let = (*it).letter.unicode(); 381 n->let = (*it).letter.unicode();
382 n->isword = s->isword; 382 n->isword = s->isword;
383 n->islast = 0; 383 n->islast = 0;
384 n->offset = appendToArray(s); 384 n->offset = appendToArray(s);
385 if ( n->offset ) { 385 if ( n->offset ) {
386 int t = n->offset-here; 386 int t = n->offset-here;
387 n->offset=t; 387 n->offset=t;
388 if ( n->offset != t ) 388 if ( n->offset != t )
389 qWarning("Overflow: too many words"); 389 qWarning("Overflow: too many words");
390 } 390 }
391 here++; 391 here++;
392 } 392 }
393 n->islast = 1; 393 n->islast = 1;
394 } 394 }
395 return t->key; 395 return t->key;
396 } 396 }
397 397
398private: 398private:
399 int nodes; 399 int nodes;
400 QDawg::Node *node; 400 QDawg::Node *node;
401}; 401};
402 402
403/*!
404 \class QDawg qdawg.h
405 \brief The QDawg class provides an implementation of a Directed Acyclic Word Graph.
403 406
407 A DAWG provides very fast look-up of words in a word list.
408
409 The word list is created using readFile(), read() or
410 createFromWords(). A list of all the DAWG's words is returned by
411 allWords(), and the total number of words is returned by
412 countWords(). Use contains() to see if a particular word is in the
413 DAWG. The root \link qdawg-node.html node\endlink is returned by root().
414
415 A global DAWG is maintained for the current locale. See the
416 \l Global class for details.
417
418 The structure of a DAWG is a graph of \link qdawg-node.html
419 Nodes\endlink. There are no cycles in the graph (since there are no
420 inifinitely repeating words). Each \link qdawg-node.html
421 Node\endlink is a member of a list of \link qdawg-node.html
422 Nodes\endlink called a child list. Each \link qdawg-node.html
423 Node\endlink in the child list has a \e letter, an \e isWord flag,
424 at most one \e jump arc, and at most one arc to the next child in
425 the list.
426
427 If you traverse the \link qdawg-node.html Nodes\endlink in a DAWG,
428 starting from the root(), and you concatenate all the letters from
429 the single child in each child list that you visit, at every \link
430 qdawg-node.html Node\endlink which has the isWord flag set your
431 concatenation will be a word in the list represented by the DAWG.
432
433 For example, the DAWG below represents the word list:
434 ban, band, can, cane, cans, pan, pane, pans.
435
436 This structuring not only provides O(1) lookup of words in the word list,
437 but also produces a smaller storage file than a plain text file word list.
438
439 \img qdawg.png
440*/
441
442/*!
443 Constructs a new empty DAWG.
444*/
404QDawg::QDawg() 445QDawg::QDawg()
405{ 446{
406 d = 0; 447 d = 0;
407} 448}
408 449
450/*!
451 Deletes the DAWG.
452*/
409QDawg::~QDawg() 453QDawg::~QDawg()
410{ 454{
411 delete d; 455 delete d;
412} 456}
413 457
458/*!
459 \overload
460 Replaces all the DAWG's words with words read from \a dev.
461*/
414bool QDawg::createFromWords(QIODevice* dev) 462bool QDawg::createFromWords(QIODevice* dev)
415{ 463{
416 delete d; 464 delete d;
417 465
418 QTextStream i(dev); 466 QTextStream i(dev);
419 QTrie* trie = new QTrie; 467 QTrie* trie = new QTrie;
420 int n=0; 468 int n=0;
421 while (!i.atEnd()) { 469 while (!i.atEnd()) {
422 trie->insertWord(QString::fromUtf8(i.readLine())); 470 trie->insertWord(QString::fromUtf8(i.readLine()));
423 n++; 471 n++;
424 } 472 }
425 if ( n ) 473 if ( n )
426 d = new QDawgPrivate(trie); 474 d = new QDawgPrivate(trie);
427 else 475 else
428 d = 0; 476 d = 0;
429 return TRUE; 477 return TRUE;
430} 478}
431 479
480/*!
481 Replaces all the DAWG's words with the words in the \a list.
482*/
432void QDawg::createFromWords(const QStringList& list) 483void QDawg::createFromWords(const QStringList& list)
433{ 484{
434 delete d; 485 delete d;
435 486
436 if ( list.count() ) { 487 if ( list.count() ) {
437 QTrie* trie = new QTrie; 488 QTrie* trie = new QTrie;
438 for (QStringList::ConstIterator it = list.begin(); it != list.end(); ++it) { 489 for (QStringList::ConstIterator it = list.begin(); it != list.end(); ++it) {
439 trie->insertWord(*it); 490 trie->insertWord(*it);
440 } 491 }
441 d = new QDawgPrivate(trie); 492 d = new QDawgPrivate(trie);
442 } else { 493 } else {
443 d = 0; 494 d = 0;
444 } 495 }
445} 496}
446 497
498/*!
499 Returns a list of all the words in the DAWG.
500*/
447QStringList QDawg::allWords() const 501QStringList QDawg::allWords() const
448{ 502{
449 QStringList result; 503 QStringList result;
450 if ( d ) d->appendAllWords(result); 504 if ( d ) d->appendAllWords(result);
451 return result; 505 return result;
452} 506}
453 507
454 508
509/*!
510 Replaces the DAWG with the DAWG in \a filename.
511 The file is memory-mapped.
512
513 \sa write()
514*/
455bool QDawg::readFile(const QString& filename) 515bool QDawg::readFile(const QString& filename)
456{ 516{
457 delete d; 517 delete d;
458 d = 0; 518 d = 0;
459 int f = ::open( QFile::encodeName(filename), O_RDONLY ); 519 int f = ::open( QFile::encodeName(filename), O_RDONLY );
460 if ( f < 0 ) 520 if ( f < 0 )
461 return FALSE; 521 return FALSE;
462 struct stat st; 522 struct stat st;
463 if ( !fstat( f, &st ) ) { 523 if ( !fstat( f, &st ) ) {
464 char * tmp = (char*)mmap( 0, st.st_size, // any address, whole file 524 char * tmp = (char*)mmap( 0, st.st_size, // any address, whole file
465 PROT_READ, // read-only memory 525 PROT_READ, // read-only memory
466 MAP_FILE | MAP_PRIVATE, // swap-backed map from file 526 MAP_FILE | MAP_PRIVATE, // swap-backed map from file
467 f, 0 ); // from offset 0 of f 527 f, 0 ); // from offset 0 of f
468 if ( tmp && tmp != (char*)MAP_FAILED ) 528 if ( tmp && tmp != (char*)MAP_FAILED )
469 d = new QDawgPrivate((uchar*)tmp); 529 d = new QDawgPrivate((uchar*)tmp);
470 } 530 }
471 ::close( f ); 531 ::close( f );
472 return d; 532 return d;
473} 533}
474 534
535/*!
536 Replaces the DAWG with the DAWG in \a dev.
537 The file is memory-mapped.
538
539 \sa write()
540*/
475bool QDawg::read(QIODevice* dev) 541bool QDawg::read(QIODevice* dev)
476{ 542{
477 delete d; 543 delete d;
478 d = new QDawgPrivate(dev); 544 d = new QDawgPrivate(dev);
479 if ( d->ok() ) 545 if ( d->ok() )
480 return TRUE; 546 return TRUE;
481 delete d; 547 delete d;
482 d = 0; 548 d = 0;
483 return FALSE; 549 return FALSE;
484} 550}
485 551
552/*!
553 Writes the DAWG to \a dev, in a custom QDAWG format.
554*/
486bool QDawg::write(QIODevice* dev) const 555bool QDawg::write(QIODevice* dev) const
487{ 556{
488 return d ? d->write(dev) : TRUE; 557 return d ? d->write(dev) : TRUE;
489} 558}
490 559
560/*!
561 Returns the number of words in the DAWG.
562*/
491int QDawg::countWords() const 563int QDawg::countWords() const
492{ 564{
493 return d ? d->countWords() : 0; 565 return d ? d->countWords() : 0;
494} 566}
495 567
568/*!
569 Returns the root \link qdawg-node.html Node\endlink of the DAWG.
570*/
496const QDawg::Node* QDawg::root() const 571const QDawg::Node* QDawg::root() const
497{ 572{
498 return d ? d->root() : 0; 573 return d ? d->root() : 0;
499} 574}
500 575
576/*!
577 Returns TRUE if the DAWG contains the word \a s; otherwise returns
578 FALSE.
579*/
501bool QDawg::contains(const QString& s) const 580bool QDawg::contains(const QString& s) const
502{ 581{
503 return d ? d->contains(s) : FALSE; 582 return d ? d->contains(s) : FALSE;
504} 583}
505 584
585/*!
586 \internal
587
588 For debugging: prints out the DAWG contents.
589*/
506void QDawg::dump() const 590void QDawg::dump() const
507{ 591{
508 if ( d ) d->dump(); 592 if ( d ) d->dump();
509} 593}
510 594
595/*!
596 \class QDawg::Node qdawg.h
597 \brief The QDawg::Node class represents one node of a QDawg.
598*/
599
600/*!
601 \fn QChar QDawg::Node::letter() const
602
603 Returns this Node's letter.
604*/
605/*!
606 \fn bool QDawg::Node::isWord() const
607
608 Returns TRUE if this Node is the end of a word; otherwise returns
609 FALSE.
610*/
611/*!
612 \fn bool QDawg::Node::isLast() const
613
614 Returns TRUE if this Node is the last in the child list; otherwise
615 returns FALSE.
616*/
617/*!
618 \fn const Node* QDawg::Node::next() const
619
620 Returns the next child Node in the child list or 0 if the current
621 Node isLast().
622*/
623/*!
624 \fn const Node* QDawg::Node::jump() const
625
626 Returns the node connected to this Node.
627*/