1 files changed, 111 insertions, 0 deletions
diff --git a/inputmethods/dasher/AlphabetMap.h b/inputmethods/dasher/AlphabetMap.h
new file mode 100644
index 0000000..3aac1f5
--- a/dev/null
+++ b/inputmethods/dasher/AlphabetMap.h
@@ -0,0 +1,111 @@
+// AlphabetMap.h
+//
+/////////////////////////////////////////////////////////////////////////////
+//
+// Copyright (c) 2002 Iain Murray
+//
+/////////////////////////////////////////////////////////////////////////////
+/*
+If I were just using GCC, which comes with the CGI "STL" implementation, I would
+use hash_map (which isn't part of the ANSI/ISO standard C++ STL, but hey it's nice).
+Using a plain map is just too slow for training on large files (or it is with certain
+STL implementations). I'm sure training could be made much faster still, but that's
+another matter...
+While I could (and probably should) get a hash_map for VC++ from
+http://www.stlport.org I thought it would be nicer if people didn't have
+to download extra stuff and then have to get it working alongside the STL
+with VC++, especially for just one small part of Dasher.
+The result is this:
+***************************************************
+very much thrown together to get Dasher out ASAP.
+***************************************************
+It is deliberately not like an STL container.
+However, as it has a tiny interface, it should still be easy to replace.
+Sorry if this seems really unprofressional.
+Replacing it might be a good idea. On the other hand it could be customised
+to the needs of the alphabet, so that it works faster. For example,
+currently if I have a string "asdf", it might be that "a" is checked
+then "as" is checked then "asd" is checked. I shouldn't need to keep
+rehashing the leading characters. I plan to fix that here. Doing so with
+a standard hash_map would be hard.
+Usage:
+alphabet_map MyMap(NumberOfEntriesWeExpect); // Can omit NumberOfEntriesWeExpect
+MyMap.add("asdf", 15);
+symbol i = MyMap.get("asdf") // i=15
+symbol j = MyMap.get("fdsa") // j=0
+You can't remove items once they are added as Dasher has no need for that.
+IAM 08/2002
+*/
+#ifndef __AlphabetMap_h__
+#define __AlphabetMap_h__
+#include "MSVC_Unannoy.h"
+#include <vector>
+#include <string>
+#include "DasherTypes.h"
+namespace Dasher {class alphabet_map;}
+class Dasher::alphabet_map
+{
+public:
+        alphabet_map(uint InitialTableSize=255);
+        void Add(const std::string& Key, symbol Value);
+        symbol Get(const std::string& Key, bool* KeyIsPrefix=0) const;
+        
+private:
+        class Entry
+        {
+        public:
+                Entry(std::string Key, symbol Symbol, Entry* Next)
+                        : Key(Key), Symbol(Symbol), Next(Next), KeyIsPrefix(false) {}
+                
+                std::string Key;
+                bool KeyIsPrefix;
+                symbol Symbol;
+                Entry* Next;
+        };
+        
+        void RecursiveAdd(const std::string& Key, symbol Value, bool PrefixFlag);
+        
+        // A standard hash -- could try and research something specific.
+        inline uint Hash(const std::string& Input) const {
+                uint Result = 0;
+        
+                typedef std::string::const_iterator CI;
+                CI Cur = Input.begin();
+                CI end = Input.end();
+                
+                while (Cur!=end) Result = (Result<<1)^*Cur++;
+                Result %= HashTable.size();
+                
+                return Result;
+                /*
+                if (Input.size()==1) // Speedup for ASCII text
+                        return Input[0];
+                
+                for (int i=0; i<Input.size(); i++)
+                        Result = (Result<<1)^Input[i];
+                
+                return Result%HashTable.size();
+                */
+        }
+        
+        std::vector<Entry> Entries;
+        std::vector<Entry*> HashTable;
+        const symbol Undefined;
+};
+#endif /* #ifndef __AlphabetMap_h__ */

diff --git a/inputmethods/dasher/AlphabetMap.h b/inputmethods/dasher/AlphabetMap.h new file mode 100644 index 0000000..3aac1f5 --- a/dev/null +++ b/inputmethods/dasher/AlphabetMap.h
@@ -0,0 +1,111 @@
	1	// AlphabetMap.h
	2	//
	3	/////////////////////////////////////////////////////////////////////////////
	4	//
	5	// Copyright (c) 2002 Iain Murray
	6	//
	7	/////////////////////////////////////////////////////////////////////////////
	8
	9
	10	/*
	11	If I were just using GCC, which comes with the CGI "STL" implementation, I would
	12	use hash_map (which isn't part of the ANSI/ISO standard C++ STL, but hey it's nice).
	13	Using a plain map is just too slow for training on large files (or it is with certain
	14	STL implementations). I'm sure training could be made much faster still, but that's
	15	another matter...
	16
	17	While I could (and probably should) get a hash_map for VC++ from
	18	http://www.stlport.org I thought it would be nicer if people didn't have
	19	to download extra stuff and then have to get it working alongside the STL
	20	with VC++, especially for just one small part of Dasher.
	21
	22	The result is this:
	23	***************************************************
	24	very much thrown together to get Dasher out ASAP.
	25	***************************************************
	26	It is deliberately not like an STL container.
	27	However, as it has a tiny interface, it should still be easy to replace.
	28	Sorry if this seems really unprofressional.
	29
	30	Replacing it might be a good idea. On the other hand it could be customised
	31	to the needs of the alphabet, so that it works faster. For example,
	32	currently if I have a string "asdf", it might be that "a" is checked
	33	then "as" is checked then "asd" is checked. I shouldn't need to keep
	34	rehashing the leading characters. I plan to fix that here. Doing so with
	35	a standard hash_map would be hard.
	36
	37
	38	Usage:
	39	alphabet_map MyMap(NumberOfEntriesWeExpect); // Can omit NumberOfEntriesWeExpect
	40	MyMap.add("asdf", 15);
	41	symbol i = MyMap.get("asdf") // i=15
	42	symbol j = MyMap.get("fdsa") // j=0
	43
	44	You can't remove items once they are added as Dasher has no need for that.
	45
	46	IAM 08/2002
	47	*/
	48
	49	#ifndef __AlphabetMap_h__
	50	#define __AlphabetMap_h__
	51
	52	#include "MSVC_Unannoy.h"
	53	#include <vector>
	54	#include <string>
	55
	56	#include "DasherTypes.h"
	57
	58	namespace Dasher {class alphabet_map;}
	59	class Dasher::alphabet_map
	60	{
	61	public:
	62	alphabet_map(uint InitialTableSize=255);
	63	void Add(const std::string& Key, symbol Value);
	64	symbol Get(const std::string& Key, bool* KeyIsPrefix=0) const;
	65
	66	private:
	67	class Entry
	68	{
	69	public:
	70	Entry(std::string Key, symbol Symbol, Entry* Next)
	71	: Key(Key), Symbol(Symbol), Next(Next), KeyIsPrefix(false) {}
	72
	73	std::string Key;
	74	bool KeyIsPrefix;
	75	symbol Symbol;
	76	Entry* Next;
	77	};
	78
	79	void RecursiveAdd(const std::string& Key, symbol Value, bool PrefixFlag);
	80
	81	// A standard hash -- could try and research something specific.
	82	inline uint Hash(const std::string& Input) const {
	83	uint Result = 0;
	84
	85	typedef std::string::const_iterator CI;
	86	CI Cur = Input.begin();
	87	CI end = Input.end();
	88
	89	while (Cur!=end) Result = (Result<<1)^*Cur++;
	90	Result %= HashTable.size();
	91
	92	return Result;
	93	/*
	94	if (Input.size()==1) // Speedup for ASCII text
	95	return Input[0];
	96
	97	for (int i=0; i<Input.size(); i++)
	98	Result = (Result<<1)^Input[i];
	99
	100
	101	return Result%HashTable.size();
	102	*/
	103	}
	104
	105	std::vector<Entry> Entries;
	106	std::vector<Entry*> HashTable;
	107	const symbol Undefined;
	108	};
	109
	110
	111	#endif /* #ifndef __AlphabetMap_h__ */