-rw-r--r-- | library/qdawg.cpp | 123 |
1 files changed, 120 insertions, 3 deletions
diff --git a/library/qdawg.cpp b/library/qdawg.cpp index 3615693..af5dc82 100644 --- a/library/qdawg.cpp +++ b/library/qdawg.cpp @@ -1,28 +1,28 @@ /********************************************************************** -** Copyright (C) 2000 Trolltech AS. All rights reserved. +** Copyright (C) 2000-2002 Trolltech AS. All rights reserved. ** -** This file is part of Qtopia Environment. +** This file is part of the Qtopia Environment. ** ** This file may be distributed and/or modified under the terms of the ** GNU General Public License version 2 as published by the Free Software ** Foundation and appearing in the file LICENSE.GPL included in the ** packaging of this file. ** ** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE ** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. ** ** See http://www.trolltech.com/gpl/ for GPL licensing information. ** ** Contact info@trolltech.com if any conditions of this licensing are ** not clear to you. ** **********************************************************************/ #include "qdawg.h" #include <qintdict.h> #include <qvaluelist.h> #include <qtextstream.h> #include <qfile.h> #include <qtl.h> #include <limits.h> #include <stdio.h> @@ -175,49 +175,49 @@ QTrie* QTrie::clubLeader(TrieClubDirectory& directory) } return this; } int QTrie::collectKeys() { int n=0; if ( key ) key=0,n+=children.count(); for (TrieList::Iterator it=children.begin(); it!=children.end(); ++it) n += (*it).p->collectKeys(); return n; } int TriePtr::operator <(const TriePtr& o) const { return letter < o.letter; } int TriePtr::operator >(const TriePtr& o) const { return letter > o.letter; } int TriePtr::operator <=(const TriePtr& o) const { return letter <= o.letter; } bool TrieList::equal(TrieList& l) { if ( count() != l.count() ) return FALSE; - sort(); l.sort(); + sort(); l.sort(); ConstIterator it2 = begin(); ConstIterator it = l.begin(); for( ; it != l.end(); ++it, ++it2 ) if ( (*it).letter != (*it2).letter || ! (*it).p->equal((*it2).p) ) return FALSE; return TRUE; } QTrie* TrieList::findAdd(QChar c) { for (Iterator it=begin(); it!=end(); ++it) { if ( (*it).letter == c ) return (*it).p; } TriePtr p; p.p = new QTrie; p.letter = c; prepend(p); sorted=FALSE; sort(); return p.p; } static const char* dawg_sig = "QDAWG100"; @@ -379,132 +379,249 @@ private: QTrie* s = (*it).p; ++n; n->let = (*it).letter.unicode(); n->isword = s->isword; n->islast = 0; n->offset = appendToArray(s); if ( n->offset ) { int t = n->offset-here; n->offset=t; if ( n->offset != t ) qWarning("Overflow: too many words"); } here++; } n->islast = 1; } return t->key; } private: int nodes; QDawg::Node *node; }; +/*! + \class QDawg qdawg.h + \brief The QDawg class provides an implementation of a Directed Acyclic Word Graph. + A DAWG provides very fast look-up of words in a word list. + + The word list is created using readFile(), read() or + createFromWords(). A list of all the DAWG's words is returned by + allWords(), and the total number of words is returned by + countWords(). Use contains() to see if a particular word is in the + DAWG. The root \link qdawg-node.html node\endlink is returned by root(). + + A global DAWG is maintained for the current locale. See the + \l Global class for details. + + The structure of a DAWG is a graph of \link qdawg-node.html + Nodes\endlink. There are no cycles in the graph (since there are no + inifinitely repeating words). Each \link qdawg-node.html + Node\endlink is a member of a list of \link qdawg-node.html + Nodes\endlink called a child list. Each \link qdawg-node.html + Node\endlink in the child list has a \e letter, an \e isWord flag, + at most one \e jump arc, and at most one arc to the next child in + the list. + + If you traverse the \link qdawg-node.html Nodes\endlink in a DAWG, + starting from the root(), and you concatenate all the letters from + the single child in each child list that you visit, at every \link + qdawg-node.html Node\endlink which has the isWord flag set your + concatenation will be a word in the list represented by the DAWG. + + For example, the DAWG below represents the word list: + ban, band, can, cane, cans, pan, pane, pans. + + This structuring not only provides O(1) lookup of words in the word list, + but also produces a smaller storage file than a plain text file word list. + + \img qdawg.png +*/ + +/*! + Constructs a new empty DAWG. +*/ QDawg::QDawg() { d = 0; } +/*! + Deletes the DAWG. +*/ QDawg::~QDawg() { delete d; } +/*! + \overload + Replaces all the DAWG's words with words read from \a dev. +*/ bool QDawg::createFromWords(QIODevice* dev) { delete d; QTextStream i(dev); QTrie* trie = new QTrie; int n=0; while (!i.atEnd()) { trie->insertWord(QString::fromUtf8(i.readLine())); n++; } if ( n ) d = new QDawgPrivate(trie); else d = 0; return TRUE; } +/*! + Replaces all the DAWG's words with the words in the \a list. +*/ void QDawg::createFromWords(const QStringList& list) { delete d; if ( list.count() ) { QTrie* trie = new QTrie; for (QStringList::ConstIterator it = list.begin(); it != list.end(); ++it) { trie->insertWord(*it); } d = new QDawgPrivate(trie); } else { d = 0; } } +/*! + Returns a list of all the words in the DAWG. +*/ QStringList QDawg::allWords() const { QStringList result; if ( d ) d->appendAllWords(result); return result; } +/*! + Replaces the DAWG with the DAWG in \a filename. + The file is memory-mapped. + + \sa write() +*/ bool QDawg::readFile(const QString& filename) { delete d; d = 0; int f = ::open( QFile::encodeName(filename), O_RDONLY ); if ( f < 0 ) return FALSE; struct stat st; if ( !fstat( f, &st ) ) { char * tmp = (char*)mmap( 0, st.st_size, // any address, whole file PROT_READ, // read-only memory MAP_FILE | MAP_PRIVATE, // swap-backed map from file f, 0 ); // from offset 0 of f if ( tmp && tmp != (char*)MAP_FAILED ) d = new QDawgPrivate((uchar*)tmp); } ::close( f ); return d; } +/*! + Replaces the DAWG with the DAWG in \a dev. + The file is memory-mapped. + + \sa write() +*/ bool QDawg::read(QIODevice* dev) { delete d; d = new QDawgPrivate(dev); if ( d->ok() ) return TRUE; delete d; d = 0; return FALSE; } +/*! + Writes the DAWG to \a dev, in a custom QDAWG format. +*/ bool QDawg::write(QIODevice* dev) const { return d ? d->write(dev) : TRUE; } +/*! + Returns the number of words in the DAWG. +*/ int QDawg::countWords() const { return d ? d->countWords() : 0; } +/*! + Returns the root \link qdawg-node.html Node\endlink of the DAWG. +*/ const QDawg::Node* QDawg::root() const { return d ? d->root() : 0; } +/*! + Returns TRUE if the DAWG contains the word \a s; otherwise returns + FALSE. +*/ bool QDawg::contains(const QString& s) const { return d ? d->contains(s) : FALSE; } +/*! + \internal + + For debugging: prints out the DAWG contents. +*/ void QDawg::dump() const { if ( d ) d->dump(); } +/*! + \class QDawg::Node qdawg.h + \brief The QDawg::Node class represents one node of a QDawg. +*/ + +/*! + \fn QChar QDawg::Node::letter() const + + Returns this Node's letter. +*/ +/*! + \fn bool QDawg::Node::isWord() const + + Returns TRUE if this Node is the end of a word; otherwise returns + FALSE. +*/ +/*! + \fn bool QDawg::Node::isLast() const + + Returns TRUE if this Node is the last in the child list; otherwise + returns FALSE. +*/ +/*! + \fn const Node* QDawg::Node::next() const + + Returns the next child Node in the child list or 0 if the current + Node isLast(). +*/ +/*! + \fn const Node* QDawg::Node::jump() const + + Returns the node connected to this Node. +*/ |