Diffstat (limited to 'inputmethods/dasher/AlphabetMap.h') (more/less context) (ignore whitespace changes)
-rw-r--r-- | inputmethods/dasher/AlphabetMap.h | 111 |
1 files changed, 111 insertions, 0 deletions
diff --git a/inputmethods/dasher/AlphabetMap.h b/inputmethods/dasher/AlphabetMap.h new file mode 100644 index 0000000..3aac1f5 --- a/dev/null +++ b/inputmethods/dasher/AlphabetMap.h | |||
@@ -0,0 +1,111 @@ | |||
1 | // AlphabetMap.h | ||
2 | // | ||
3 | ///////////////////////////////////////////////////////////////////////////// | ||
4 | // | ||
5 | // Copyright (c) 2002 Iain Murray | ||
6 | // | ||
7 | ///////////////////////////////////////////////////////////////////////////// | ||
8 | |||
9 | |||
10 | /* | ||
11 | If I were just using GCC, which comes with the CGI "STL" implementation, I would | ||
12 | use hash_map (which isn't part of the ANSI/ISO standard C++ STL, but hey it's nice). | ||
13 | Using a plain map is just too slow for training on large files (or it is with certain | ||
14 | STL implementations). I'm sure training could be made much faster still, but that's | ||
15 | another matter... | ||
16 | |||
17 | While I could (and probably should) get a hash_map for VC++ from | ||
18 | http://www.stlport.org I thought it would be nicer if people didn't have | ||
19 | to download extra stuff and then have to get it working alongside the STL | ||
20 | with VC++, especially for just one small part of Dasher. | ||
21 | |||
22 | The result is this: | ||
23 | *************************************************** | ||
24 | very much thrown together to get Dasher out ASAP. | ||
25 | *************************************************** | ||
26 | It is deliberately not like an STL container. | ||
27 | However, as it has a tiny interface, it should still be easy to replace. | ||
28 | Sorry if this seems really unprofressional. | ||
29 | |||
30 | Replacing it might be a good idea. On the other hand it could be customised | ||
31 | to the needs of the alphabet, so that it works faster. For example, | ||
32 | currently if I have a string "asdf", it might be that "a" is checked | ||
33 | then "as" is checked then "asd" is checked. I shouldn't need to keep | ||
34 | rehashing the leading characters. I plan to fix that here. Doing so with | ||
35 | a standard hash_map would be hard. | ||
36 | |||
37 | |||
38 | Usage: | ||
39 | alphabet_map MyMap(NumberOfEntriesWeExpect); // Can omit NumberOfEntriesWeExpect | ||
40 | MyMap.add("asdf", 15); | ||
41 | symbol i = MyMap.get("asdf") // i=15 | ||
42 | symbol j = MyMap.get("fdsa") // j=0 | ||
43 | |||
44 | You can't remove items once they are added as Dasher has no need for that. | ||
45 | |||
46 | IAM 08/2002 | ||
47 | */ | ||
48 | |||
49 | #ifndef __AlphabetMap_h__ | ||
50 | #define __AlphabetMap_h__ | ||
51 | |||
52 | #include "MSVC_Unannoy.h" | ||
53 | #include <vector> | ||
54 | #include <string> | ||
55 | |||
56 | #include "DasherTypes.h" | ||
57 | |||
58 | namespace Dasher {class alphabet_map;} | ||
59 | class Dasher::alphabet_map | ||
60 | { | ||
61 | public: | ||
62 | alphabet_map(uint InitialTableSize=255); | ||
63 | void Add(const std::string& Key, symbol Value); | ||
64 | symbol Get(const std::string& Key, bool* KeyIsPrefix=0) const; | ||
65 | |||
66 | private: | ||
67 | class Entry | ||
68 | { | ||
69 | public: | ||
70 | Entry(std::string Key, symbol Symbol, Entry* Next) | ||
71 | : Key(Key), Symbol(Symbol), Next(Next), KeyIsPrefix(false) {} | ||
72 | |||
73 | std::string Key; | ||
74 | bool KeyIsPrefix; | ||
75 | symbol Symbol; | ||
76 | Entry* Next; | ||
77 | }; | ||
78 | |||
79 | void RecursiveAdd(const std::string& Key, symbol Value, bool PrefixFlag); | ||
80 | |||
81 | // A standard hash -- could try and research something specific. | ||
82 | inline uint Hash(const std::string& Input) const { | ||
83 | uint Result = 0; | ||
84 | |||
85 | typedef std::string::const_iterator CI; | ||
86 | CI Cur = Input.begin(); | ||
87 | CI end = Input.end(); | ||
88 | |||
89 | while (Cur!=end) Result = (Result<<1)^*Cur++; | ||
90 | Result %= HashTable.size(); | ||
91 | |||
92 | return Result; | ||
93 | /* | ||
94 | if (Input.size()==1) // Speedup for ASCII text | ||
95 | return Input[0]; | ||
96 | |||
97 | for (int i=0; i<Input.size(); i++) | ||
98 | Result = (Result<<1)^Input[i]; | ||
99 | |||
100 | |||
101 | return Result%HashTable.size(); | ||
102 | */ | ||
103 | } | ||
104 | |||
105 | std::vector<Entry> Entries; | ||
106 | std::vector<Entry*> HashTable; | ||
107 | const symbol Undefined; | ||
108 | }; | ||
109 | |||
110 | |||
111 | #endif /* #ifndef __AlphabetMap_h__ */ | ||