summaryrefslogtreecommitdiff
path: root/inputmethods/dasher/PPMLanguageModel.h
blob: 7025a0aabe9a49fcb82f5b5ea1719663b30aee29 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
// PPMLanguageModel.h
//
/////////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 1999-2002 David Ward
//
/////////////////////////////////////////////////////////////////////////////

#ifndef __PPMLanguageModel_h__
#define __PPMLanguageModel_h__

#include "NoClones.h"
#include "MSVC_Unannoy.h"
#include <vector>
#include <stdio.h>

#include "LanguageModel.h"

// static char dumpTrieStr[40000];
const int MAX_ORDER = 5;
const int maxcont =200;

namespace Dasher {class CPPMLanguageModel;}
class Dasher::CPPMLanguageModel : public Dasher::CLanguageModel, private NoClones
{
public:
	CPPMLanguageModel(CAlphabet *_alphabet, int _normalization);
	virtual ~CPPMLanguageModel();
	
	class CPPMnode {
	public:
		CPPMnode* find_symbol(int sym);
		CPPMnode* add_symbol_to_node(int sym,int *update);
		CPPMnode* child;
		CPPMnode* next;
		CPPMnode* vine;
		short int count;
		const short int symbol;
		CPPMnode(int sym);
	};
	
	class CPPMContext : public CContext {
	public:
		CPPMContext(CPPMContext const &input) : CContext(input)
                  { head = input.head;order= input.order;}
		CPPMContext(CPPMnode* _head=0, int _order=0) : head(_head),order(_order) {};
		~CPPMContext() {};
		void dump();
		CPPMnode* head;
		int order;
	};
	
	void ReleaseContext(CContext*);
	CContext* GetRootContext();
	inline CContext* CloneContext(CContext*);
	void EnterSymbol(CContext* context, modelchar Symbol);
	//inline bool GetProbs(CContext*,std::vector<symbol> &newchars,std::vector<unsigned int> &groups,std::vector<unsigned int> &probs,double addprob);
	bool GetProbs(CContext*, std::vector<unsigned int> &Probs, double AddProb);
	
	void LearnSymbol(CContext* Context, modelchar Symbol);
	void dump();
	
private:
	CPPMContext *m_rootcontext;
	CPPMnode *root;
	void AddSymbol(CPPMContext& context,int symbol);
	void dumpSymbol(int symbol);
	void dumpString( char *str, int pos, int len );
	void dumpTrie( CPPMnode *t, int d );



};

////////////////////////////////////////////////////////////////////////
// Inline functions 
////////////////////////////////////////////////////////////////////////

////////////////////////////////////////////////////////////////////////

inline CPPMLanguageModel::CPPMnode::CPPMnode(int sym) : symbol(sym)
{
	child=next=vine=0;
	count=1;
}

///////////////////////////////////////////////////////////////////

inline void CPPMLanguageModel::CPPMContext::dump() 
	// diagnostic output
{
	// TODO uncomment this when headers sorted out
	//dchar debug[128];
	//Usprintf(debug,TEXT("head %x order %d\n"),head,order);
	//DebugOutput(debug);
}

///////////////////////////////////////////////////////////////////

inline CContext* CPPMLanguageModel::GetRootContext()
{
	CPPMContext * nc = new CPPMLanguageModel::CPPMContext(*m_rootcontext);
	CContext *cont=static_cast<CContext *> (nc);
	return  cont;
}

///////////////////////////////////////////////////////////////////

inline CContext* CPPMLanguageModel::CloneContext(CContext *copythis)
{
	CPPMContext *ppmcontext=static_cast<CPPMContext *> (copythis);
	CPPMContext * nc = new CPPMLanguageModel::CPPMContext(*ppmcontext);
	return  static_cast<CContext *> (nc);
}

///////////////////////////////////////////////////////////////////

inline void CPPMLanguageModel::ReleaseContext(CContext *release)
{
	delete release;
}

#endif /* #ifndef __PPMLanguageModel_H__ */