summaryrefslogtreecommitdiff
path: root/inputmethods/dasher/AlphabetMap.h
Unidiff
Diffstat (limited to 'inputmethods/dasher/AlphabetMap.h') (more/less context) (ignore whitespace changes)
-rw-r--r--inputmethods/dasher/AlphabetMap.h111
1 files changed, 111 insertions, 0 deletions
diff --git a/inputmethods/dasher/AlphabetMap.h b/inputmethods/dasher/AlphabetMap.h
new file mode 100644
index 0000000..3aac1f5
--- a/dev/null
+++ b/inputmethods/dasher/AlphabetMap.h
@@ -0,0 +1,111 @@
1// AlphabetMap.h
2//
3/////////////////////////////////////////////////////////////////////////////
4//
5// Copyright (c) 2002 Iain Murray
6//
7/////////////////////////////////////////////////////////////////////////////
8
9
10/*
11If I were just using GCC, which comes with the CGI "STL" implementation, I would
12use hash_map (which isn't part of the ANSI/ISO standard C++ STL, but hey it's nice).
13Using a plain map is just too slow for training on large files (or it is with certain
14STL implementations). I'm sure training could be made much faster still, but that's
15another matter...
16
17While I could (and probably should) get a hash_map for VC++ from
18http://www.stlport.org I thought it would be nicer if people didn't have
19to download extra stuff and then have to get it working alongside the STL
20with VC++, especially for just one small part of Dasher.
21
22The result is this:
23***************************************************
24very much thrown together to get Dasher out ASAP.
25***************************************************
26It is deliberately not like an STL container.
27However, as it has a tiny interface, it should still be easy to replace.
28Sorry if this seems really unprofressional.
29
30Replacing it might be a good idea. On the other hand it could be customised
31to the needs of the alphabet, so that it works faster. For example,
32currently if I have a string "asdf", it might be that "a" is checked
33then "as" is checked then "asd" is checked. I shouldn't need to keep
34rehashing the leading characters. I plan to fix that here. Doing so with
35a standard hash_map would be hard.
36
37
38Usage:
39alphabet_map MyMap(NumberOfEntriesWeExpect); // Can omit NumberOfEntriesWeExpect
40MyMap.add("asdf", 15);
41symbol i = MyMap.get("asdf") // i=15
42symbol j = MyMap.get("fdsa") // j=0
43
44You can't remove items once they are added as Dasher has no need for that.
45
46IAM 08/2002
47*/
48
49#ifndef __AlphabetMap_h__
50#define __AlphabetMap_h__
51
52#include "MSVC_Unannoy.h"
53#include <vector>
54#include <string>
55
56#include "DasherTypes.h"
57
58namespace Dasher {class alphabet_map;}
59class Dasher::alphabet_map
60{
61public:
62 alphabet_map(uint InitialTableSize=255);
63 void Add(const std::string& Key, symbol Value);
64 symbol Get(const std::string& Key, bool* KeyIsPrefix=0) const;
65
66private:
67 class Entry
68 {
69 public:
70 Entry(std::string Key, symbol Symbol, Entry* Next)
71 : Key(Key), Symbol(Symbol), Next(Next), KeyIsPrefix(false) {}
72
73 std::string Key;
74 bool KeyIsPrefix;
75 symbol Symbol;
76 Entry* Next;
77 };
78
79 void RecursiveAdd(const std::string& Key, symbol Value, bool PrefixFlag);
80
81 // A standard hash -- could try and research something specific.
82 inline uint Hash(const std::string& Input) const {
83 uint Result = 0;
84
85 typedef std::string::const_iterator CI;
86 CI Cur = Input.begin();
87 CI end = Input.end();
88
89 while (Cur!=end) Result = (Result<<1)^*Cur++;
90 Result %= HashTable.size();
91
92 return Result;
93 /*
94 if (Input.size()==1) // Speedup for ASCII text
95 return Input[0];
96
97 for (int i=0; i<Input.size(); i++)
98 Result = (Result<<1)^Input[i];
99
100
101 return Result%HashTable.size();
102 */
103 }
104
105 std::vector<Entry> Entries;
106 std::vector<Entry*> HashTable;
107 const symbol Undefined;
108};
109
110
111#endif /* #ifndef __AlphabetMap_h__ */