summaryrefslogtreecommitdiff
path: root/library/qdawg.cpp
Unidiff
Diffstat (limited to 'library/qdawg.cpp') (more/less context) (ignore whitespace changes)
-rw-r--r--library/qdawg.cpp123
1 files changed, 120 insertions, 3 deletions
diff --git a/library/qdawg.cpp b/library/qdawg.cpp
index 3615693..af5dc82 100644
--- a/library/qdawg.cpp
+++ b/library/qdawg.cpp
@@ -1,52 +1,52 @@
1/********************************************************************** 1/**********************************************************************
2** Copyright (C) 2000 Trolltech AS. All rights reserved. 2** Copyright (C) 2000-2002 Trolltech AS. All rights reserved.
3** 3**
4** This file is part of Qtopia Environment. 4** This file is part of the Qtopia Environment.
5** 5**
6** This file may be distributed and/or modified under the terms of the 6** This file may be distributed and/or modified under the terms of the
7** GNU General Public License version 2 as published by the Free Software 7** GNU General Public License version 2 as published by the Free Software
8** Foundation and appearing in the file LICENSE.GPL included in the 8** Foundation and appearing in the file LICENSE.GPL included in the
9** packaging of this file. 9** packaging of this file.
10** 10**
11** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE 11** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
12** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 12** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
13** 13**
14** See http://www.trolltech.com/gpl/ for GPL licensing information. 14** See http://www.trolltech.com/gpl/ for GPL licensing information.
15** 15**
16** Contact info@trolltech.com if any conditions of this licensing are 16** Contact info@trolltech.com if any conditions of this licensing are
17** not clear to you. 17** not clear to you.
18** 18**
19**********************************************************************/ 19**********************************************************************/
20#include "qdawg.h" 20#include "qdawg.h"
21#include <qintdict.h> 21#include <qintdict.h>
22#include <qvaluelist.h> 22#include <qvaluelist.h>
23#include <qtextstream.h> 23#include <qtextstream.h>
24#include <qfile.h> 24#include <qfile.h>
25#include <qtl.h> 25#include <qtl.h>
26 26
27#include <limits.h> 27#include <limits.h>
28#include <stdio.h> 28#include <stdio.h>
29 29
30// for mmap 30// for mmap
31#include <sys/types.h> 31#include <sys/types.h>
32#include <sys/stat.h> 32#include <sys/stat.h>
33#include <sys/mman.h> 33#include <sys/mman.h>
34#include <fcntl.h> 34#include <fcntl.h>
35#include <errno.h> 35#include <errno.h>
36#include <unistd.h> 36#include <unistd.h>
37 37
38class QDawgPrivate; 38class QDawgPrivate;
39class QTrie; 39class QTrie;
40 40
41typedef QValueList<QTrie*> TrieClub; 41typedef QValueList<QTrie*> TrieClub;
42typedef QIntDict<TrieClub> TrieClubDirectory; 42typedef QIntDict<TrieClub> TrieClubDirectory;
43 43
44class TriePtr { 44class TriePtr {
45public: 45public:
46 QChar letter; 46 QChar letter;
47 QTrie* p; 47 QTrie* p;
48 int operator <(const TriePtr& o) const; 48 int operator <(const TriePtr& o) const;
49 int operator >(const TriePtr& o) const; 49 int operator >(const TriePtr& o) const;
50 int operator <=(const TriePtr& o) const; 50 int operator <=(const TriePtr& o) const;
51}; 51};
52 52
@@ -151,97 +151,97 @@ void QTrie::distributeKeys(TrieClubDirectory& directory)
151 if ( s->maxdepth+1 > maxdepth ) 151 if ( s->maxdepth+1 > maxdepth )
152 maxdepth = s->maxdepth+1; 152 maxdepth = s->maxdepth+1;
153 } 153 }
154 if ( decendants ) { 154 if ( decendants ) {
155 key += decendants + maxdepth*256 + children.count() * 65536; 155 key += decendants + maxdepth*256 + children.count() * 65536;
156 if ( !key ) key++; // unlikely 156 if ( !key ) key++; // unlikely
157 } 157 }
158 TrieClub* c = directory[key]; 158 TrieClub* c = directory[key];
159 if ( !c ) directory.insert(key, (c = new TrieClub) ); 159 if ( !c ) directory.insert(key, (c = new TrieClub) );
160 c->prepend(this); 160 c->prepend(this);
161} 161}
162 162
163QTrie* QTrie::clubLeader(TrieClubDirectory& directory) 163QTrie* QTrie::clubLeader(TrieClubDirectory& directory)
164{ 164{
165 if ( !key ) return directory[0]->first(); 165 if ( !key ) return directory[0]->first();
166 for (TrieList::Iterator it=children.begin(); it!=children.end(); ++it) { 166 for (TrieList::Iterator it=children.begin(); it!=children.end(); ++it) {
167 QTrie* t= (*it).p->clubLeader(directory); 167 QTrie* t= (*it).p->clubLeader(directory);
168 (*it).p = t; 168 (*it).p = t;
169 } 169 }
170 TrieClub *club = directory[key]; 170 TrieClub *club = directory[key];
171 for (TrieClub::Iterator it = club->begin(); it != club->end(); ++it) { 171 for (TrieClub::Iterator it = club->begin(); it != club->end(); ++it) {
172 QTrie* o = *it; 172 QTrie* o = *it;
173 if ( o->equal(this) ) 173 if ( o->equal(this) )
174 return o; 174 return o;
175 } 175 }
176 return this; 176 return this;
177} 177}
178 178
179int QTrie::collectKeys() 179int QTrie::collectKeys()
180{ 180{
181 int n=0; 181 int n=0;
182 if ( key ) key=0,n+=children.count(); 182 if ( key ) key=0,n+=children.count();
183 for (TrieList::Iterator it=children.begin(); it!=children.end(); ++it) 183 for (TrieList::Iterator it=children.begin(); it!=children.end(); ++it)
184 n += (*it).p->collectKeys(); 184 n += (*it).p->collectKeys();
185 return n; 185 return n;
186} 186}
187 187
188int TriePtr::operator <(const TriePtr& o) const 188int TriePtr::operator <(const TriePtr& o) const
189 { return letter < o.letter; } 189 { return letter < o.letter; }
190int TriePtr::operator >(const TriePtr& o) const 190int TriePtr::operator >(const TriePtr& o) const
191 { return letter > o.letter; } 191 { return letter > o.letter; }
192int TriePtr::operator <=(const TriePtr& o) const 192int TriePtr::operator <=(const TriePtr& o) const
193 { return letter <= o.letter; } 193 { return letter <= o.letter; }
194 194
195bool TrieList::equal(TrieList& l) 195bool TrieList::equal(TrieList& l)
196{ 196{
197 if ( count() != l.count() ) 197 if ( count() != l.count() )
198 return FALSE; 198 return FALSE;
199 sort(); l.sort(); 199 sort(); l.sort();
200 ConstIterator it2 = begin(); 200 ConstIterator it2 = begin();
201 ConstIterator it = l.begin(); 201 ConstIterator it = l.begin();
202 for( ; it != l.end(); ++it, ++it2 ) 202 for( ; it != l.end(); ++it, ++it2 )
203 if ( (*it).letter != (*it2).letter || ! (*it).p->equal((*it2).p) ) 203 if ( (*it).letter != (*it2).letter || ! (*it).p->equal((*it2).p) )
204 return FALSE; 204 return FALSE;
205 return TRUE; 205 return TRUE;
206} 206}
207QTrie* TrieList::findAdd(QChar c) 207QTrie* TrieList::findAdd(QChar c)
208{ 208{
209 for (Iterator it=begin(); it!=end(); ++it) { 209 for (Iterator it=begin(); it!=end(); ++it) {
210 if ( (*it).letter == c ) 210 if ( (*it).letter == c )
211 return (*it).p; 211 return (*it).p;
212 } 212 }
213 TriePtr p; 213 TriePtr p;
214 p.p = new QTrie; 214 p.p = new QTrie;
215 p.letter = c; 215 p.letter = c;
216 prepend(p); 216 prepend(p);
217 sorted=FALSE; 217 sorted=FALSE;
218 sort(); 218 sort();
219 return p.p; 219 return p.p;
220} 220}
221 221
222static const char* dawg_sig = "QDAWG100"; 222static const char* dawg_sig = "QDAWG100";
223 223
224class QDawgPrivate { 224class QDawgPrivate {
225public: 225public:
226 QDawgPrivate(QIODevice* dev) 226 QDawgPrivate(QIODevice* dev)
227 { 227 {
228 QDataStream ds(dev); 228 QDataStream ds(dev);
229 char sig[8]; 229 char sig[8];
230 ds.readRawBytes(sig,8); 230 ds.readRawBytes(sig,8);
231 if ( !strncmp(dawg_sig,sig,8) ) { 231 if ( !strncmp(dawg_sig,sig,8) ) {
232 uint n; 232 uint n;
233 char* nn; 233 char* nn;
234 ds.readBytes(nn,n); 234 ds.readBytes(nn,n);
235 235
236 // #### endianness problem ignored. 236 // #### endianness problem ignored.
237 node = (QDawg::Node*)nn; 237 node = (QDawg::Node*)nn;
238 nodes = n / sizeof(QDawg::Node); 238 nodes = n / sizeof(QDawg::Node);
239 } else { 239 } else {
240 node = 0; 240 node = 0;
241 } 241 }
242 } 242 }
243 243
244 bool ok() const { return node; } 244 bool ok() const { return node; }
245 245
246 QDawgPrivate(uchar* mem) 246 QDawgPrivate(uchar* mem)
247 { 247 {
@@ -355,156 +355,273 @@ public:
355 } 355 }
356 356
357 const QDawg::Node* root() { return node; } 357 const QDawg::Node* root() { return node; }
358 358
359private: 359private:
360 void generateArray(QTrie* t) 360 void generateArray(QTrie* t)
361 { 361 {
362 nodes = 0; 362 nodes = 0;
363 int n = t->collectKeys(); 363 int n = t->collectKeys();
364 node = new QDawg::Node[n]; 364 node = new QDawg::Node[n];
365 appendToArray(t); 365 appendToArray(t);
366 ASSERT(n == nodes); 366 ASSERT(n == nodes);
367 } 367 }
368 368
369 int appendToArray(QTrie* t) 369 int appendToArray(QTrie* t)
370 { 370 {
371 if ( !t->key ) { 371 if ( !t->key ) {
372 if ( !t->children.count() ) 372 if ( !t->children.count() )
373 return 0; 373 return 0;
374 t->key = nodes; 374 t->key = nodes;
375 nodes += t->children.count(); 375 nodes += t->children.count();
376 QDawg::Node* n = &node[t->key-1]; 376 QDawg::Node* n = &node[t->key-1];
377 int here = t->key; 377 int here = t->key;
378 for (TrieList::Iterator it=t->children.begin(); it!=t->children.end(); ++it) { 378 for (TrieList::Iterator it=t->children.begin(); it!=t->children.end(); ++it) {
379 QTrie* s = (*it).p; 379 QTrie* s = (*it).p;
380 ++n; 380 ++n;
381 n->let = (*it).letter.unicode(); 381 n->let = (*it).letter.unicode();
382 n->isword = s->isword; 382 n->isword = s->isword;
383 n->islast = 0; 383 n->islast = 0;
384 n->offset = appendToArray(s); 384 n->offset = appendToArray(s);
385 if ( n->offset ) { 385 if ( n->offset ) {
386 int t = n->offset-here; 386 int t = n->offset-here;
387 n->offset=t; 387 n->offset=t;
388 if ( n->offset != t ) 388 if ( n->offset != t )
389 qWarning("Overflow: too many words"); 389 qWarning("Overflow: too many words");
390 } 390 }
391 here++; 391 here++;
392 } 392 }
393 n->islast = 1; 393 n->islast = 1;
394 } 394 }
395 return t->key; 395 return t->key;
396 } 396 }
397 397
398private: 398private:
399 int nodes; 399 int nodes;
400 QDawg::Node *node; 400 QDawg::Node *node;
401}; 401};
402 402
403/*!
404 \class QDawg qdawg.h
405 \brief The QDawg class provides an implementation of a Directed Acyclic Word Graph.
403 406
407 A DAWG provides very fast look-up of words in a word list.
408
409 The word list is created using readFile(), read() or
410 createFromWords(). A list of all the DAWG's words is returned by
411 allWords(), and the total number of words is returned by
412 countWords(). Use contains() to see if a particular word is in the
413 DAWG. The root \link qdawg-node.html node\endlink is returned by root().
414
415 A global DAWG is maintained for the current locale. See the
416 \l Global class for details.
417
418 The structure of a DAWG is a graph of \link qdawg-node.html
419 Nodes\endlink. There are no cycles in the graph (since there are no
420 inifinitely repeating words). Each \link qdawg-node.html
421 Node\endlink is a member of a list of \link qdawg-node.html
422 Nodes\endlink called a child list. Each \link qdawg-node.html
423 Node\endlink in the child list has a \e letter, an \e isWord flag,
424 at most one \e jump arc, and at most one arc to the next child in
425 the list.
426
427 If you traverse the \link qdawg-node.html Nodes\endlink in a DAWG,
428 starting from the root(), and you concatenate all the letters from
429 the single child in each child list that you visit, at every \link
430 qdawg-node.html Node\endlink which has the isWord flag set your
431 concatenation will be a word in the list represented by the DAWG.
432
433 For example, the DAWG below represents the word list:
434 ban, band, can, cane, cans, pan, pane, pans.
435
436 This structuring not only provides O(1) lookup of words in the word list,
437 but also produces a smaller storage file than a plain text file word list.
438
439 \img qdawg.png
440*/
441
442/*!
443 Constructs a new empty DAWG.
444*/
404QDawg::QDawg() 445QDawg::QDawg()
405{ 446{
406 d = 0; 447 d = 0;
407} 448}
408 449
450/*!
451 Deletes the DAWG.
452*/
409QDawg::~QDawg() 453QDawg::~QDawg()
410{ 454{
411 delete d; 455 delete d;
412} 456}
413 457
458/*!
459 \overload
460 Replaces all the DAWG's words with words read from \a dev.
461*/
414bool QDawg::createFromWords(QIODevice* dev) 462bool QDawg::createFromWords(QIODevice* dev)
415{ 463{
416 delete d; 464 delete d;
417 465
418 QTextStream i(dev); 466 QTextStream i(dev);
419 QTrie* trie = new QTrie; 467 QTrie* trie = new QTrie;
420 int n=0; 468 int n=0;
421 while (!i.atEnd()) { 469 while (!i.atEnd()) {
422 trie->insertWord(QString::fromUtf8(i.readLine())); 470 trie->insertWord(QString::fromUtf8(i.readLine()));
423 n++; 471 n++;
424 } 472 }
425 if ( n ) 473 if ( n )
426 d = new QDawgPrivate(trie); 474 d = new QDawgPrivate(trie);
427 else 475 else
428 d = 0; 476 d = 0;
429 return TRUE; 477 return TRUE;
430} 478}
431 479
480/*!
481 Replaces all the DAWG's words with the words in the \a list.
482*/
432void QDawg::createFromWords(const QStringList& list) 483void QDawg::createFromWords(const QStringList& list)
433{ 484{
434 delete d; 485 delete d;
435 486
436 if ( list.count() ) { 487 if ( list.count() ) {
437 QTrie* trie = new QTrie; 488 QTrie* trie = new QTrie;
438 for (QStringList::ConstIterator it = list.begin(); it != list.end(); ++it) { 489 for (QStringList::ConstIterator it = list.begin(); it != list.end(); ++it) {
439 trie->insertWord(*it); 490 trie->insertWord(*it);
440 } 491 }
441 d = new QDawgPrivate(trie); 492 d = new QDawgPrivate(trie);
442 } else { 493 } else {
443 d = 0; 494 d = 0;
444 } 495 }
445} 496}
446 497
498/*!
499 Returns a list of all the words in the DAWG.
500*/
447QStringList QDawg::allWords() const 501QStringList QDawg::allWords() const
448{ 502{
449 QStringList result; 503 QStringList result;
450 if ( d ) d->appendAllWords(result); 504 if ( d ) d->appendAllWords(result);
451 return result; 505 return result;
452} 506}
453 507
454 508
509/*!
510 Replaces the DAWG with the DAWG in \a filename.
511 The file is memory-mapped.
512
513 \sa write()
514*/
455bool QDawg::readFile(const QString& filename) 515bool QDawg::readFile(const QString& filename)
456{ 516{
457 delete d; 517 delete d;
458 d = 0; 518 d = 0;
459 int f = ::open( QFile::encodeName(filename), O_RDONLY ); 519 int f = ::open( QFile::encodeName(filename), O_RDONLY );
460 if ( f < 0 ) 520 if ( f < 0 )
461 return FALSE; 521 return FALSE;
462 struct stat st; 522 struct stat st;
463 if ( !fstat( f, &st ) ) { 523 if ( !fstat( f, &st ) ) {
464 char * tmp = (char*)mmap( 0, st.st_size, // any address, whole file 524 char * tmp = (char*)mmap( 0, st.st_size, // any address, whole file
465 PROT_READ, // read-only memory 525 PROT_READ, // read-only memory
466 MAP_FILE | MAP_PRIVATE, // swap-backed map from file 526 MAP_FILE | MAP_PRIVATE, // swap-backed map from file
467 f, 0 ); // from offset 0 of f 527 f, 0 ); // from offset 0 of f
468 if ( tmp && tmp != (char*)MAP_FAILED ) 528 if ( tmp && tmp != (char*)MAP_FAILED )
469 d = new QDawgPrivate((uchar*)tmp); 529 d = new QDawgPrivate((uchar*)tmp);
470 } 530 }
471 ::close( f ); 531 ::close( f );
472 return d; 532 return d;
473} 533}
474 534
535/*!
536 Replaces the DAWG with the DAWG in \a dev.
537 The file is memory-mapped.
538
539 \sa write()
540*/
475bool QDawg::read(QIODevice* dev) 541bool QDawg::read(QIODevice* dev)
476{ 542{
477 delete d; 543 delete d;
478 d = new QDawgPrivate(dev); 544 d = new QDawgPrivate(dev);
479 if ( d->ok() ) 545 if ( d->ok() )
480 return TRUE; 546 return TRUE;
481 delete d; 547 delete d;
482 d = 0; 548 d = 0;
483 return FALSE; 549 return FALSE;
484} 550}
485 551
552/*!
553 Writes the DAWG to \a dev, in a custom QDAWG format.
554*/
486bool QDawg::write(QIODevice* dev) const 555bool QDawg::write(QIODevice* dev) const
487{ 556{
488 return d ? d->write(dev) : TRUE; 557 return d ? d->write(dev) : TRUE;
489} 558}
490 559
560/*!
561 Returns the number of words in the DAWG.
562*/
491int QDawg::countWords() const 563int QDawg::countWords() const
492{ 564{
493 return d ? d->countWords() : 0; 565 return d ? d->countWords() : 0;
494} 566}
495 567
568/*!
569 Returns the root \link qdawg-node.html Node\endlink of the DAWG.
570*/
496const QDawg::Node* QDawg::root() const 571const QDawg::Node* QDawg::root() const
497{ 572{
498 return d ? d->root() : 0; 573 return d ? d->root() : 0;
499} 574}
500 575
576/*!
577 Returns TRUE if the DAWG contains the word \a s; otherwise returns
578 FALSE.
579*/
501bool QDawg::contains(const QString& s) const 580bool QDawg::contains(const QString& s) const
502{ 581{
503 return d ? d->contains(s) : FALSE; 582 return d ? d->contains(s) : FALSE;
504} 583}
505 584
585/*!
586 \internal
587
588 For debugging: prints out the DAWG contents.
589*/
506void QDawg::dump() const 590void QDawg::dump() const
507{ 591{
508 if ( d ) d->dump(); 592 if ( d ) d->dump();
509} 593}
510 594
595/*!
596 \class QDawg::Node qdawg.h
597 \brief The QDawg::Node class represents one node of a QDawg.
598*/
599
600/*!
601 \fn QChar QDawg::Node::letter() const
602
603 Returns this Node's letter.
604*/
605/*!
606 \fn bool QDawg::Node::isWord() const
607
608 Returns TRUE if this Node is the end of a word; otherwise returns
609 FALSE.
610*/
611/*!
612 \fn bool QDawg::Node::isLast() const
613
614 Returns TRUE if this Node is the last in the child list; otherwise
615 returns FALSE.
616*/
617/*!
618 \fn const Node* QDawg::Node::next() const
619
620 Returns the next child Node in the child list or 0 if the current
621 Node isLast().
622*/
623/*!
624 \fn const Node* QDawg::Node::jump() const
625
626 Returns the node connected to this Node.
627*/