summaryrefslogtreecommitdiff
path: root/library/qdawg.cpp
Unidiff
Diffstat (limited to 'library/qdawg.cpp') (more/less context) (ignore whitespace changes)
-rw-r--r--library/qdawg.cpp123
1 files changed, 120 insertions, 3 deletions
diff --git a/library/qdawg.cpp b/library/qdawg.cpp
index 3615693..af5dc82 100644
--- a/library/qdawg.cpp
+++ b/library/qdawg.cpp
@@ -1,16 +1,16 @@
1/********************************************************************** 1/**********************************************************************
2** Copyright (C) 2000 Trolltech AS. All rights reserved. 2** Copyright (C) 2000-2002 Trolltech AS. All rights reserved.
3** 3**
4** This file is part of Qtopia Environment. 4** This file is part of the Qtopia Environment.
5** 5**
6** This file may be distributed and/or modified under the terms of the 6** This file may be distributed and/or modified under the terms of the
7** GNU General Public License version 2 as published by the Free Software 7** GNU General Public License version 2 as published by the Free Software
8** Foundation and appearing in the file LICENSE.GPL included in the 8** Foundation and appearing in the file LICENSE.GPL included in the
9** packaging of this file. 9** packaging of this file.
10** 10**
11** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE 11** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
12** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 12** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
13** 13**
14** See http://www.trolltech.com/gpl/ for GPL licensing information. 14** See http://www.trolltech.com/gpl/ for GPL licensing information.
15** 15**
16** Contact info@trolltech.com if any conditions of this licensing are 16** Contact info@trolltech.com if any conditions of this licensing are
@@ -187,25 +187,25 @@ int QTrie::collectKeys()
187 187
188int TriePtr::operator <(const TriePtr& o) const 188int TriePtr::operator <(const TriePtr& o) const
189 { return letter < o.letter; } 189 { return letter < o.letter; }
190int TriePtr::operator >(const TriePtr& o) const 190int TriePtr::operator >(const TriePtr& o) const
191 { return letter > o.letter; } 191 { return letter > o.letter; }
192int TriePtr::operator <=(const TriePtr& o) const 192int TriePtr::operator <=(const TriePtr& o) const
193 { return letter <= o.letter; } 193 { return letter <= o.letter; }
194 194
195bool TrieList::equal(TrieList& l) 195bool TrieList::equal(TrieList& l)
196{ 196{
197 if ( count() != l.count() ) 197 if ( count() != l.count() )
198 return FALSE; 198 return FALSE;
199 sort(); l.sort(); 199 sort(); l.sort();
200 ConstIterator it2 = begin(); 200 ConstIterator it2 = begin();
201 ConstIterator it = l.begin(); 201 ConstIterator it = l.begin();
202 for( ; it != l.end(); ++it, ++it2 ) 202 for( ; it != l.end(); ++it, ++it2 )
203 if ( (*it).letter != (*it2).letter || ! (*it).p->equal((*it2).p) ) 203 if ( (*it).letter != (*it2).letter || ! (*it).p->equal((*it2).p) )
204 return FALSE; 204 return FALSE;
205 return TRUE; 205 return TRUE;
206} 206}
207QTrie* TrieList::findAdd(QChar c) 207QTrie* TrieList::findAdd(QChar c)
208{ 208{
209 for (Iterator it=begin(); it!=end(); ++it) { 209 for (Iterator it=begin(); it!=end(); ++it) {
210 if ( (*it).letter == c ) 210 if ( (*it).letter == c )
211 return (*it).p; 211 return (*it).p;
@@ -391,120 +391,237 @@ private:
391 here++; 391 here++;
392 } 392 }
393 n->islast = 1; 393 n->islast = 1;
394 } 394 }
395 return t->key; 395 return t->key;
396 } 396 }
397 397
398private: 398private:
399 int nodes; 399 int nodes;
400 QDawg::Node *node; 400 QDawg::Node *node;
401}; 401};
402 402
403/*!
404 \class QDawg qdawg.h
405 \brief The QDawg class provides an implementation of a Directed Acyclic Word Graph.
403 406
407 A DAWG provides very fast look-up of words in a word list.
408
409 The word list is created using readFile(), read() or
410 createFromWords(). A list of all the DAWG's words is returned by
411 allWords(), and the total number of words is returned by
412 countWords(). Use contains() to see if a particular word is in the
413 DAWG. The root \link qdawg-node.html node\endlink is returned by root().
414
415 A global DAWG is maintained for the current locale. See the
416 \l Global class for details.
417
418 The structure of a DAWG is a graph of \link qdawg-node.html
419 Nodes\endlink. There are no cycles in the graph (since there are no
420 inifinitely repeating words). Each \link qdawg-node.html
421 Node\endlink is a member of a list of \link qdawg-node.html
422 Nodes\endlink called a child list. Each \link qdawg-node.html
423 Node\endlink in the child list has a \e letter, an \e isWord flag,
424 at most one \e jump arc, and at most one arc to the next child in
425 the list.
426
427 If you traverse the \link qdawg-node.html Nodes\endlink in a DAWG,
428 starting from the root(), and you concatenate all the letters from
429 the single child in each child list that you visit, at every \link
430 qdawg-node.html Node\endlink which has the isWord flag set your
431 concatenation will be a word in the list represented by the DAWG.
432
433 For example, the DAWG below represents the word list:
434 ban, band, can, cane, cans, pan, pane, pans.
435
436 This structuring not only provides O(1) lookup of words in the word list,
437 but also produces a smaller storage file than a plain text file word list.
438
439 \img qdawg.png
440*/
441
442/*!
443 Constructs a new empty DAWG.
444*/
404QDawg::QDawg() 445QDawg::QDawg()
405{ 446{
406 d = 0; 447 d = 0;
407} 448}
408 449
450/*!
451 Deletes the DAWG.
452*/
409QDawg::~QDawg() 453QDawg::~QDawg()
410{ 454{
411 delete d; 455 delete d;
412} 456}
413 457
458/*!
459 \overload
460 Replaces all the DAWG's words with words read from \a dev.
461*/
414bool QDawg::createFromWords(QIODevice* dev) 462bool QDawg::createFromWords(QIODevice* dev)
415{ 463{
416 delete d; 464 delete d;
417 465
418 QTextStream i(dev); 466 QTextStream i(dev);
419 QTrie* trie = new QTrie; 467 QTrie* trie = new QTrie;
420 int n=0; 468 int n=0;
421 while (!i.atEnd()) { 469 while (!i.atEnd()) {
422 trie->insertWord(QString::fromUtf8(i.readLine())); 470 trie->insertWord(QString::fromUtf8(i.readLine()));
423 n++; 471 n++;
424 } 472 }
425 if ( n ) 473 if ( n )
426 d = new QDawgPrivate(trie); 474 d = new QDawgPrivate(trie);
427 else 475 else
428 d = 0; 476 d = 0;
429 return TRUE; 477 return TRUE;
430} 478}
431 479
480/*!
481 Replaces all the DAWG's words with the words in the \a list.
482*/
432void QDawg::createFromWords(const QStringList& list) 483void QDawg::createFromWords(const QStringList& list)
433{ 484{
434 delete d; 485 delete d;
435 486
436 if ( list.count() ) { 487 if ( list.count() ) {
437 QTrie* trie = new QTrie; 488 QTrie* trie = new QTrie;
438 for (QStringList::ConstIterator it = list.begin(); it != list.end(); ++it) { 489 for (QStringList::ConstIterator it = list.begin(); it != list.end(); ++it) {
439 trie->insertWord(*it); 490 trie->insertWord(*it);
440 } 491 }
441 d = new QDawgPrivate(trie); 492 d = new QDawgPrivate(trie);
442 } else { 493 } else {
443 d = 0; 494 d = 0;
444 } 495 }
445} 496}
446 497
498/*!
499 Returns a list of all the words in the DAWG.
500*/
447QStringList QDawg::allWords() const 501QStringList QDawg::allWords() const
448{ 502{
449 QStringList result; 503 QStringList result;
450 if ( d ) d->appendAllWords(result); 504 if ( d ) d->appendAllWords(result);
451 return result; 505 return result;
452} 506}
453 507
454 508
509/*!
510 Replaces the DAWG with the DAWG in \a filename.
511 The file is memory-mapped.
512
513 \sa write()
514*/
455bool QDawg::readFile(const QString& filename) 515bool QDawg::readFile(const QString& filename)
456{ 516{
457 delete d; 517 delete d;
458 d = 0; 518 d = 0;
459 int f = ::open( QFile::encodeName(filename), O_RDONLY ); 519 int f = ::open( QFile::encodeName(filename), O_RDONLY );
460 if ( f < 0 ) 520 if ( f < 0 )
461 return FALSE; 521 return FALSE;
462 struct stat st; 522 struct stat st;
463 if ( !fstat( f, &st ) ) { 523 if ( !fstat( f, &st ) ) {
464 char * tmp = (char*)mmap( 0, st.st_size, // any address, whole file 524 char * tmp = (char*)mmap( 0, st.st_size, // any address, whole file
465 PROT_READ, // read-only memory 525 PROT_READ, // read-only memory
466 MAP_FILE | MAP_PRIVATE, // swap-backed map from file 526 MAP_FILE | MAP_PRIVATE, // swap-backed map from file
467 f, 0 ); // from offset 0 of f 527 f, 0 ); // from offset 0 of f
468 if ( tmp && tmp != (char*)MAP_FAILED ) 528 if ( tmp && tmp != (char*)MAP_FAILED )
469 d = new QDawgPrivate((uchar*)tmp); 529 d = new QDawgPrivate((uchar*)tmp);
470 } 530 }
471 ::close( f ); 531 ::close( f );
472 return d; 532 return d;
473} 533}
474 534
535/*!
536 Replaces the DAWG with the DAWG in \a dev.
537 The file is memory-mapped.
538
539 \sa write()
540*/
475bool QDawg::read(QIODevice* dev) 541bool QDawg::read(QIODevice* dev)
476{ 542{
477 delete d; 543 delete d;
478 d = new QDawgPrivate(dev); 544 d = new QDawgPrivate(dev);
479 if ( d->ok() ) 545 if ( d->ok() )
480 return TRUE; 546 return TRUE;
481 delete d; 547 delete d;
482 d = 0; 548 d = 0;
483 return FALSE; 549 return FALSE;
484} 550}
485 551
552/*!
553 Writes the DAWG to \a dev, in a custom QDAWG format.
554*/
486bool QDawg::write(QIODevice* dev) const 555bool QDawg::write(QIODevice* dev) const
487{ 556{
488 return d ? d->write(dev) : TRUE; 557 return d ? d->write(dev) : TRUE;
489} 558}
490 559
560/*!
561 Returns the number of words in the DAWG.
562*/
491int QDawg::countWords() const 563int QDawg::countWords() const
492{ 564{
493 return d ? d->countWords() : 0; 565 return d ? d->countWords() : 0;
494} 566}
495 567
568/*!
569 Returns the root \link qdawg-node.html Node\endlink of the DAWG.
570*/
496const QDawg::Node* QDawg::root() const 571const QDawg::Node* QDawg::root() const
497{ 572{
498 return d ? d->root() : 0; 573 return d ? d->root() : 0;
499} 574}
500 575
576/*!
577 Returns TRUE if the DAWG contains the word \a s; otherwise returns
578 FALSE.
579*/
501bool QDawg::contains(const QString& s) const 580bool QDawg::contains(const QString& s) const
502{ 581{
503 return d ? d->contains(s) : FALSE; 582 return d ? d->contains(s) : FALSE;
504} 583}
505 584
585/*!
586 \internal
587
588 For debugging: prints out the DAWG contents.
589*/
506void QDawg::dump() const 590void QDawg::dump() const
507{ 591{
508 if ( d ) d->dump(); 592 if ( d ) d->dump();
509} 593}
510 594
595/*!
596 \class QDawg::Node qdawg.h
597 \brief The QDawg::Node class represents one node of a QDawg.
598*/
599
600/*!
601 \fn QChar QDawg::Node::letter() const
602
603 Returns this Node's letter.
604*/
605/*!
606 \fn bool QDawg::Node::isWord() const
607
608 Returns TRUE if this Node is the end of a word; otherwise returns
609 FALSE.
610*/
611/*!
612 \fn bool QDawg::Node::isLast() const
613
614 Returns TRUE if this Node is the last in the child list; otherwise
615 returns FALSE.
616*/
617/*!
618 \fn const Node* QDawg::Node::next() const
619
620 Returns the next child Node in the child list or 0 if the current
621 Node isLast().
622*/
623/*!
624 \fn const Node* QDawg::Node::jump() const
625
626 Returns the node connected to this Node.
627*/