summaryrefslogtreecommitdiff
path: root/inputmethods/dasher/LanguageModel.h
blob: 8ffe1009957f4138eeca4828bf90f78b542f338a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
// LanguageModel.h
//
/////////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2001-2002 David Ward
//
/////////////////////////////////////////////////////////////////////////////

// Abstract language model class
// See PPMModel for an example implementation

// Contexts are indentified by a unique unsigned integer

#ifndef __LanguageModel_h__
#define __LanguageModel_h__

#include "MSVC_Unannoy.h"
#include <vector>
#include <string>

#include "Alphabet.h"
#include "Context.h"

namespace Dasher {class CLanguageModel;}
class Dasher::CLanguageModel
{
public:
	CLanguageModel(CAlphabet* Alphabet, int Normalization);

	// Interface for the Dasher code
	// --------------------------------------------------------------------------
	class CNodeContext {
	public:
		CNodeContext() {};
		virtual ~CNodeContext() {};
	};
	
	// return the model's normalization - what the probabilities sum to
	const int normalization() const { return m_iNorm;}
	
	CNodeContext* GetRootNodeContext();
	CNodeContext* CloneNodeContext(CNodeContext* NodeContext);
	void ReleaseNodeContext(CNodeContext* NodeContext);
	void EnterNodeSymbol(CNodeContext* NodeContext, symbol Symbol);
	void LearnNodeSymbol(CNodeContext* NodeContext, symbol Symbol);
	void EnterText(CNodeContext* NodeContext, std::string TheText);
	void LearnText(CNodeContext* NodeContext, std::string* TheText, bool IsMore);
	bool GetNodeProbs(CNodeContext* Context, std::vector<symbol> &NewSymbols,
		std::vector<unsigned int> &Groups, std::vector<unsigned int> &Probs, double AddProb);
	
	// Alphabet pass-through functions for widely needed information
	symbol GetSpaceSymbol() {return m_Alphabet->GetSpaceSymbol();}
	
	int GetColour(int character);

protected:
	int GetNumberModelChars() {return m_Alphabet->GetNumberSymbols();}
	
	// Generic language model functions to be implemented 
	// --------------------------------------------------------------------------
	typedef unsigned int modelchar;
	
	// return the id for the root context:
	virtual CContext* GetRootContext()=0;
	// clone a context and return the new id:
	virtual CContext* CloneContext(CContext*)=0;
	// delete a context:
	virtual void ReleaseContext(CContext*)=0;
	// diagnostic info:
	virtual void dump()=0;
	// add character to the language model:
	virtual void LearnSymbol(CContext* Context, modelchar Symbol)=0;
	// update context with a character:
	virtual void EnterSymbol(CContext* context, modelchar Symbol)=0;
	// get the probability distrubution at the given context:
	virtual bool GetProbs(CContext* Context, std::vector<unsigned int> &Probs, double AddProb)=0;
	
private:
	CAlphabet *m_Alphabet;
	int m_iModelChars; // number of charater in the model 1...ModelChars
	int m_iNorm;       // normalization of probabilities
};

using namespace Dasher;

///////////////////////////////////////////////////////////////////

inline CLanguageModel::CNodeContext* CLanguageModel::GetRootNodeContext()
{
	return (CNodeContext*) GetRootContext();
}

///////////////////////////////////////////////////////////////////

inline CLanguageModel::CNodeContext* CLanguageModel::CloneNodeContext(CNodeContext* NodeContext)
{
	return (CNodeContext*) CloneContext((CContext*) NodeContext);
}

///////////////////////////////////////////////////////////////////

inline void CLanguageModel::ReleaseNodeContext(CNodeContext* NodeContext)
{
	ReleaseContext((CContext*) NodeContext);
}

///////////////////////////////////////////////////////////////////

inline void CLanguageModel::EnterNodeSymbol(CNodeContext* NodeContext, symbol Symbol)
{
	EnterSymbol((CContext*) NodeContext, (modelchar) Symbol);
}

///////////////////////////////////////////////////////////////////

inline void CLanguageModel::LearnNodeSymbol(CNodeContext* NodeContext, symbol Symbol)
{
	LearnSymbol((CContext*) NodeContext, (modelchar) Symbol);
}



#endif /* #ifndef __LanguageModel_h__ */