summaryrefslogtreecommitdiff
path: root/library/qdawg.cpp
authorkergoth <kergoth>2002-01-25 22:14:26 (UTC)
committer kergoth <kergoth>2002-01-25 22:14:26 (UTC)
commit15318cad33835e4e2dc620d033e43cd930676cdd (patch) (unidiff)
treec2fa0399a2c47fda8e2cd0092c73a809d17f68eb /library/qdawg.cpp
downloadopie-15318cad33835e4e2dc620d033e43cd930676cdd.zip
opie-15318cad33835e4e2dc620d033e43cd930676cdd.tar.gz
opie-15318cad33835e4e2dc620d033e43cd930676cdd.tar.bz2
Initial revision
Diffstat (limited to 'library/qdawg.cpp') (more/less context) (ignore whitespace changes)
-rw-r--r--library/qdawg.cpp510
1 files changed, 510 insertions, 0 deletions
diff --git a/library/qdawg.cpp b/library/qdawg.cpp
new file mode 100644
index 0000000..3615693
--- a/dev/null
+++ b/library/qdawg.cpp
@@ -0,0 +1,510 @@
1/**********************************************************************
2** Copyright (C) 2000 Trolltech AS. All rights reserved.
3**
4** This file is part of Qtopia Environment.
5**
6** This file may be distributed and/or modified under the terms of the
7** GNU General Public License version 2 as published by the Free Software
8** Foundation and appearing in the file LICENSE.GPL included in the
9** packaging of this file.
10**
11** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
12** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
13**
14** See http://www.trolltech.com/gpl/ for GPL licensing information.
15**
16** Contact info@trolltech.com if any conditions of this licensing are
17** not clear to you.
18**
19**********************************************************************/
20#include "qdawg.h"
21#include <qintdict.h>
22#include <qvaluelist.h>
23#include <qtextstream.h>
24#include <qfile.h>
25#include <qtl.h>
26
27#include <limits.h>
28#include <stdio.h>
29
30// for mmap
31#include <sys/types.h>
32#include <sys/stat.h>
33#include <sys/mman.h>
34#include <fcntl.h>
35#include <errno.h>
36#include <unistd.h>
37
38class QDawgPrivate;
39class QTrie;
40
41typedef QValueList<QTrie*> TrieClub;
42typedef QIntDict<TrieClub> TrieClubDirectory;
43
44class TriePtr {
45public:
46 QChar letter;
47 QTrie* p;
48 int operator <(const TriePtr& o) const;
49 int operator >(const TriePtr& o) const;
50 int operator <=(const TriePtr& o) const;
51};
52
53class TrieList : public QValueList<TriePtr> {
54 bool sorted;
55public:
56 TrieList()
57 {
58 sorted=TRUE;
59 }
60
61 QTrie* findAdd(QChar c);
62 bool equal(TrieList& l);
63
64 void sort()
65 {
66 if ( !sorted ) {
67 qHeapSort(*this);
68 sorted = TRUE;
69 }
70 }
71};
72
73// A fast but memory-wasting temporary class. The Dawg is the goal.
74class QTrie {
75public:
76 QTrie();
77 ~QTrie();
78
79 void insertWord(const QString& s, uint index=0);
80 bool equal(QTrie* o);
81 void dump(int indent=0);
82
83private:
84 TrieList children;
85 bool isword;
86
87 friend class QDawgPrivate;
88 int maxdepth;
89 int decendants;
90 int key;
91 void distributeKeys(TrieClubDirectory& directory);
92 QTrie* clubLeader(TrieClubDirectory& directory);
93 int collectKeys();
94 friend class TriePtr;
95 friend class TrieList;
96};
97
98QTrie::QTrie()
99{
100 key = 0;
101 isword = FALSE;
102}
103
104QTrie::~QTrie()
105{
106 // NOTE: we do not delete the children - after conversion to DAWG
107 // it's too difficult. The QTrie's are deleted via the directory.
108}
109
110void QTrie::insertWord(const QString& s, uint index)
111{
112 if ( index == s.length() ) {
113 isword = TRUE;
114 } else {
115 QTrie* t = children.findAdd(s[index]);
116 t->insertWord(s,index+1);
117 }
118}
119
120bool QTrie::equal(QTrie* o)
121{
122 if ( o == this ) return TRUE;
123 if ( isword != o->isword )
124 return FALSE;
125 return children.equal(o->children);
126}
127
128void QTrie::dump(int indent)
129{
130 for (TrieList::Iterator it=children.begin(); it!=children.end(); ++it) {
131 QTrie* s = (*it).p;
132 for (int in=0; in<indent; in++)
133 fputc(' ',stderr);
134 fprintf(stderr," %c %d %s %p\n",(*it).letter.unicode(),
135 s->key,s->isword?"word":"",s);
136 s->dump(indent+2);
137 }
138}
139
140void QTrie::distributeKeys(TrieClubDirectory& directory)
141{
142 maxdepth = INT_MIN;
143 decendants = children.count();
144 key = 0;
145 for (TrieList::Iterator it=children.begin(); it!=children.end(); ++it) {
146 QTrie* s = (*it).p;
147 QChar l = (*it).letter;
148 s->distributeKeys(directory);
149 key = key*64+l.unicode()+s->key*5;
150 decendants += s->decendants;
151 if ( s->maxdepth+1 > maxdepth )
152 maxdepth = s->maxdepth+1;
153 }
154 if ( decendants ) {
155 key += decendants + maxdepth*256 + children.count() * 65536;
156 if ( !key ) key++; // unlikely
157 }
158 TrieClub* c = directory[key];
159 if ( !c ) directory.insert(key, (c = new TrieClub) );
160 c->prepend(this);
161}
162
163QTrie* QTrie::clubLeader(TrieClubDirectory& directory)
164{
165 if ( !key ) return directory[0]->first();
166 for (TrieList::Iterator it=children.begin(); it!=children.end(); ++it) {
167 QTrie* t= (*it).p->clubLeader(directory);
168 (*it).p = t;
169 }
170 TrieClub *club = directory[key];
171 for (TrieClub::Iterator it = club->begin(); it != club->end(); ++it) {
172 QTrie* o = *it;
173 if ( o->equal(this) )
174 return o;
175 }
176 return this;
177}
178
179int QTrie::collectKeys()
180{
181 int n=0;
182 if ( key ) key=0,n+=children.count();
183 for (TrieList::Iterator it=children.begin(); it!=children.end(); ++it)
184 n += (*it).p->collectKeys();
185 return n;
186}
187
188int TriePtr::operator <(const TriePtr& o) const
189 { return letter < o.letter; }
190int TriePtr::operator >(const TriePtr& o) const
191 { return letter > o.letter; }
192int TriePtr::operator <=(const TriePtr& o) const
193 { return letter <= o.letter; }
194
195bool TrieList::equal(TrieList& l)
196{
197 if ( count() != l.count() )
198 return FALSE;
199 sort(); l.sort();
200 ConstIterator it2 = begin();
201 ConstIterator it = l.begin();
202 for( ; it != l.end(); ++it, ++it2 )
203 if ( (*it).letter != (*it2).letter || ! (*it).p->equal((*it2).p) )
204 return FALSE;
205 return TRUE;
206}
207QTrie* TrieList::findAdd(QChar c)
208{
209 for (Iterator it=begin(); it!=end(); ++it) {
210 if ( (*it).letter == c )
211 return (*it).p;
212 }
213 TriePtr p;
214 p.p = new QTrie;
215 p.letter = c;
216 prepend(p);
217 sorted=FALSE;
218 sort();
219 return p.p;
220}
221
222static const char* dawg_sig = "QDAWG100";
223
224class QDawgPrivate {
225public:
226 QDawgPrivate(QIODevice* dev)
227 {
228 QDataStream ds(dev);
229 char sig[8];
230 ds.readRawBytes(sig,8);
231 if ( !strncmp(dawg_sig,sig,8) ) {
232 uint n;
233 char* nn;
234 ds.readBytes(nn,n);
235
236 // #### endianness problem ignored.
237 node = (QDawg::Node*)nn;
238 nodes = n / sizeof(QDawg::Node);
239 } else {
240 node = 0;
241 }
242 }
243
244 bool ok() const { return node; }
245
246 QDawgPrivate(uchar* mem)
247 {
248 if ( !strncmp(dawg_sig,(char*)mem,8) ) {
249 mem += 8;
250
251 int n = ((mem[0]*256+mem[1])*256+mem[2])*256+mem[3];
252 mem += 4;
253
254 // #### endianness problem ignored.
255 node = (QDawg::Node*)((char*)mem);
256 nodes = n / sizeof(QDawg::Node);
257 }
258 }
259
260 QDawgPrivate(QTrie* t) // destroys the QTrie.
261 {
262 TrieClubDirectory directory(9973);
263 t->distributeKeys(directory);
264 QTrie* l = t->clubLeader(directory);
265 ASSERT(l==t);
266 generateArray(t);
267
268 TrieClub *club;
269 for (QIntDictIterator<TrieClub> dit(directory); (club=dit); ++dit)
270 {
271 for (TrieClub::Iterator it = club->begin(); it != club->end(); ++it) {
272 delete *it;
273 }
274 delete club;
275 }
276 }
277
278 bool write(QIODevice* dev)
279 {
280 QDataStream ds(dev);
281 ds.writeRawBytes(dawg_sig,8);
282 // #### endianness problem ignored.
283 ds.writeBytes((char*)node,sizeof(QDawg::Node)*nodes);
284 return dev->state() == IO_Ok;
285 }
286
287 void dumpWords(int nid=0, int index=0)
288 {
289 static char word[256]; // ick latin1
290 int i=0;
291 do {
292 QDawg::Node& n = node[nid+i];
293 word[index] = n.let;
294 if ( n.isword )
295 fprintf(stderr,"%.*s\n",index+1,word);
296 if ( n.offset ) dumpWords(n.offset+nid+i,index+1);
297 } while (!node[nid+i++].islast);
298 }
299
300 void dump(int nid=0, int indent=0)
301 {
302 int i=0;
303 do {
304 QDawg::Node& n = node[nid+i];
305 fprintf(stderr,"%d: ",nid+i);
306 for (int in=0; in<indent; in++)
307 fputc(' ',stderr);
308 fprintf(stderr," %c %d %d %d\n",n.let,
309 n.isword,n.islast,n.offset);
310 if ( n.offset ) dump(n.offset+nid+i,indent+2);
311 } while (!node[nid+i++].islast);
312 }
313
314 int countWords(int nid=0)
315 {
316 int t=0;
317 int i=0;
318 do {
319 QDawg::Node& n = node[nid+i];
320 if ( n.isword )
321 t++;
322 if ( n.offset )
323 t+=countWords(n.offset+nid+i);
324 } while (!node[nid+i++].islast);
325 return t;
326 }
327
328 bool contains(const QString& s, int nid=0, int index=0) const
329 {
330 int i=0;
331 do {
332 QDawg::Node& n = node[nid+i];
333 if ( s[index] == QChar((ushort)n.let) ) {
334 if ( n.isword && index == (int)s.length()-1 )
335 return TRUE;
336 if ( n.offset )
337 return contains(s,n.offset+nid+i,index+1);
338 }
339 } while (!node[nid+i++].islast);
340 return FALSE;
341 }
342
343 void appendAllWords(QStringList& list, int nid=0, QString s="") const
344 {
345 int i=0;
346 int next = s.length();
347 do {
348 QDawg::Node& n = node[nid+i];
349 s[next] = QChar((ushort)n.let);
350 if ( n.isword )
351 list.append(s);
352 if ( n.offset )
353 appendAllWords(list, n.offset+nid+i, s);
354 } while (!node[nid+i++].islast);
355 }
356
357 const QDawg::Node* root() { return node; }
358
359private:
360 void generateArray(QTrie* t)
361 {
362 nodes = 0;
363 int n = t->collectKeys();
364 node = new QDawg::Node[n];
365 appendToArray(t);
366 ASSERT(n == nodes);
367 }
368
369 int appendToArray(QTrie* t)
370 {
371 if ( !t->key ) {
372 if ( !t->children.count() )
373 return 0;
374 t->key = nodes;
375 nodes += t->children.count();
376 QDawg::Node* n = &node[t->key-1];
377 int here = t->key;
378 for (TrieList::Iterator it=t->children.begin(); it!=t->children.end(); ++it) {
379 QTrie* s = (*it).p;
380 ++n;
381 n->let = (*it).letter.unicode();
382 n->isword = s->isword;
383 n->islast = 0;
384 n->offset = appendToArray(s);
385 if ( n->offset ) {
386 int t = n->offset-here;
387 n->offset=t;
388 if ( n->offset != t )
389 qWarning("Overflow: too many words");
390 }
391 here++;
392 }
393 n->islast = 1;
394 }
395 return t->key;
396 }
397
398private:
399 int nodes;
400 QDawg::Node *node;
401};
402
403
404QDawg::QDawg()
405{
406 d = 0;
407}
408
409QDawg::~QDawg()
410{
411 delete d;
412}
413
414bool QDawg::createFromWords(QIODevice* dev)
415{
416 delete d;
417
418 QTextStream i(dev);
419 QTrie* trie = new QTrie;
420 int n=0;
421 while (!i.atEnd()) {
422 trie->insertWord(QString::fromUtf8(i.readLine()));
423 n++;
424 }
425 if ( n )
426 d = new QDawgPrivate(trie);
427 else
428 d = 0;
429 return TRUE;
430}
431
432void QDawg::createFromWords(const QStringList& list)
433{
434 delete d;
435
436 if ( list.count() ) {
437 QTrie* trie = new QTrie;
438 for (QStringList::ConstIterator it = list.begin(); it != list.end(); ++it) {
439 trie->insertWord(*it);
440 }
441 d = new QDawgPrivate(trie);
442 } else {
443 d = 0;
444 }
445}
446
447QStringList QDawg::allWords() const
448{
449 QStringList result;
450 if ( d ) d->appendAllWords(result);
451 return result;
452}
453
454
455bool QDawg::readFile(const QString& filename)
456{
457 delete d;
458 d = 0;
459 int f = ::open( QFile::encodeName(filename), O_RDONLY );
460 if ( f < 0 )
461 return FALSE;
462 struct stat st;
463 if ( !fstat( f, &st ) ) {
464 char * tmp = (char*)mmap( 0, st.st_size, // any address, whole file
465 PROT_READ, // read-only memory
466 MAP_FILE | MAP_PRIVATE, // swap-backed map from file
467 f, 0 ); // from offset 0 of f
468 if ( tmp && tmp != (char*)MAP_FAILED )
469 d = new QDawgPrivate((uchar*)tmp);
470 }
471 ::close( f );
472 return d;
473}
474
475bool QDawg::read(QIODevice* dev)
476{
477 delete d;
478 d = new QDawgPrivate(dev);
479 if ( d->ok() )
480 return TRUE;
481 delete d;
482 d = 0;
483 return FALSE;
484}
485
486bool QDawg::write(QIODevice* dev) const
487{
488 return d ? d->write(dev) : TRUE;
489}
490
491int QDawg::countWords() const
492{
493 return d ? d->countWords() : 0;
494}
495
496const QDawg::Node* QDawg::root() const
497{
498 return d ? d->root() : 0;
499}
500
501bool QDawg::contains(const QString& s) const
502{
503 return d ? d->contains(s) : FALSE;
504}
505
506void QDawg::dump() const
507{
508 if ( d ) d->dump();
509}
510