summaryrefslogtreecommitdiff
path: root/inputmethods/dasher/PPMLanguageModel.cpp
Unidiff
Diffstat (limited to 'inputmethods/dasher/PPMLanguageModel.cpp') (more/less context) (ignore whitespace changes)
-rw-r--r--inputmethods/dasher/PPMLanguageModel.cpp8
1 files changed, 4 insertions, 4 deletions
diff --git a/inputmethods/dasher/PPMLanguageModel.cpp b/inputmethods/dasher/PPMLanguageModel.cpp
index b725a2b..137b07f 100644
--- a/inputmethods/dasher/PPMLanguageModel.cpp
+++ b/inputmethods/dasher/PPMLanguageModel.cpp
@@ -1,309 +1,309 @@
1// PPMLanguageModel.h 1// PPMLanguageModel.h
2// 2//
3///////////////////////////////////////////////////////////////////////////// 3/////////////////////////////////////////////////////////////////////////////
4// 4//
5// Copyright (c) 1999-2002 David Ward 5// Copyright (c) 1999-2002 David Ward
6// 6//
7///////////////////////////////////////////////////////////////////////////// 7/////////////////////////////////////////////////////////////////////////////
8 8
9#include <math.h> 9#include <math.h>
10#include "PPMLanguageModel.h" 10#include "PPMLanguageModel.h"
11 11
12using namespace Dasher; 12using namespace Dasher;
13using namespace std; 13using namespace std;
14 14
15// static TCHAR debug[256]; 15// static TCHAR debug[256];
16typedef unsigned long ulong; 16typedef unsigned long ulong;
17 17
18//////////////////////////////////////////////////////////////////////// 18////////////////////////////////////////////////////////////////////////
19/// PPMnode definitions 19/// PPMnode definitions
20//////////////////////////////////////////////////////////////////////// 20////////////////////////////////////////////////////////////////////////
21 21
22CPPMLanguageModel::CPPMnode *CPPMLanguageModel::CPPMnode::find_symbol(int sym) 22CPPMLanguageModel::CPPMnode *CPPMLanguageModel::CPPMnode::find_symbol(int sym)
23// see if symbol is a child of node 23// see if symbol is a child of node
24{ 24{
25 // printf("finding symbol %d at node %d\n",sym,node->id); 25 // printf("finding symbol %d at node %d\n",sym,node->id);
26 CPPMnode *found=child; 26 CPPMnode *found=child;
27 while (found) { 27 while (found) {
28 if (found->symbol==sym) 28 if (found->symbol==sym)
29 return found; 29 return found;
30 found=found->next; 30 found=found->next;
31 } 31 }
32 return 0; 32 return 0;
33} 33}
34 34
35 35
36CPPMLanguageModel::CPPMnode * CPPMLanguageModel::CPPMnode::add_symbol_to_node(int sym,int *update) 36CPPMLanguageModel::CPPMnode * CPPMLanguageModel::CPPMnode::add_symbol_to_node(int sym,int *update)
37{ 37{
38 CPPMnode *born,*search; 38 CPPMnode *born,*search;
39 search=find_symbol(sym); 39 search=find_symbol(sym);
40 if (!search) { 40 if (!search) {
41 born = new CPPMnode(sym); 41 born = new CPPMnode(sym);
42 born->next=child; 42 born->next=child;
43 child=born; 43 child=born;
44 // node->count=1; 44 // node->count=1;
45 return born; 45 return born;
46 } else { 46 } else {
47 if (*update) { // perform update exclusions 47 if (*update) { // perform update exclusions
48 search->count++; 48 search->count++;
49 *update=0; 49 *update=0;
50 } 50 }
51 return search; 51 return search;
52 } 52 }
53 53
54} 54}
55 55
56 56
57///////////////////////////////////////////////////////////////////// 57/////////////////////////////////////////////////////////////////////
58// CPPMLanguageModel defs 58// CPPMLanguageModel defs
59///////////////////////////////////////////////////////////////////// 59/////////////////////////////////////////////////////////////////////
60 60
61CPPMLanguageModel::CPPMLanguageModel(CAlphabet *_alphabet,int _normalization) 61CPPMLanguageModel::CPPMLanguageModel(CAlphabet *_alphabet,int _normalization)
62 : CLanguageModel(_alphabet,_normalization) 62 : CLanguageModel(_alphabet,_normalization)
63{ 63{
64 root=new CPPMnode(-1); 64 root=new CPPMnode(-1);
65 m_rootcontext=new CPPMContext(root,0); 65 m_rootcontext=new CPPMContext(root,0);
66} 66}
67 67
68 68
69CPPMLanguageModel::~CPPMLanguageModel() 69CPPMLanguageModel::~CPPMLanguageModel()
70{ 70{
71 delete root; 71 delete root;
72} 72}
73 73
74 74
75bool CPPMLanguageModel::GetProbs(CContext *context,vector<unsigned int> &probs,double addprob) 75bool CPPMLanguageModel::GetProbs(CContext *context,vector<unsigned int> &probs,double )
76 // get the probability distribution at the context 76 // get the probability distribution at the context
77{ 77{
78 // seems like we have to have this hack for VC++ 78 // seems like we have to have this hack for VC++
79 CPPMContext *ppmcontext=static_cast<CPPMContext *> (context); 79 CPPMContext *ppmcontext=static_cast<CPPMContext *> (context);
80 80
81 81
82 int modelchars=GetNumberModelChars(); 82 int modelchars=GetNumberModelChars();
83 int norm=CLanguageModel::normalization(); 83 int norm=CLanguageModel::normalization();
84 probs.resize(modelchars); 84 probs.resize(modelchars);
85 CPPMnode *temp,*s; 85 CPPMnode *temp,*s;
86 int loop,total; 86 int loop,total;
87 int sym; 87 int sym;
88 ulong spent=0; 88 // ulong spent=0;
89 ulong size_of_slice; 89 ulong size_of_slice;
90 bool *exclusions=new bool [modelchars]; 90 bool *exclusions=new bool [modelchars];
91 ulong uniform=modelchars; 91 ulong uniform=modelchars;
92 ulong tospend=norm-uniform; 92 ulong tospend=norm-uniform;
93 temp=ppmcontext->head; 93 temp=ppmcontext->head;
94 for (loop=0; loop <modelchars; loop++) { /* set up the exclusions array */ 94 for (loop=0; loop <modelchars; loop++) { /* set up the exclusions array */
95 probs[loop]=0; 95 probs[loop]=0;
96 exclusions[loop]=0; 96 exclusions[loop]=0;
97 } 97 }
98 while (temp!=0) { 98 while (temp!=0) {
99 //Usprintf(debug,TEXT("tospend %u\n"),tospend); 99 //Usprintf(debug,TEXT("tospend %u\n"),tospend);
100 //DebugOutput(TEXT("round\n")); 100 //DebugOutput(TEXT("round\n"));
101 total=0; 101 total=0;
102 s=temp->child; 102 s=temp->child;
103 while (s) { 103 while (s) {
104 sym=s->symbol; 104 sym=s->symbol;
105 if (!exclusions[s->symbol]) 105 if (!exclusions[s->symbol])
106 total=total+s->count; 106 total=total+s->count;
107 s=s->next; 107 s=s->next;
108 } 108 }
109 if (total) { 109 if (total) {
110 //Usprintf(debug,TEXT"escape %u\n"),tospend* 110 //Usprintf(debug,TEXT"escape %u\n"),tospend*
111 size_of_slice=tospend; 111 size_of_slice=tospend;
112 s=temp->child; 112 s=temp->child;
113 while (s) { 113 while (s) {
114 if (!exclusions[s->symbol]) { 114 if (!exclusions[s->symbol]) {
115 exclusions[s->symbol]=1; 115 exclusions[s->symbol]=1;
116 ulong p=size_of_slice*(2*s->count-1)/2/ulong(total); 116 ulong p=size_of_slice*(2*s->count-1)/2/ulong(total);
117 probs[s->symbol]+=p; 117 probs[s->symbol]+=p;
118 tospend-=p; 118 tospend-=p;
119 } 119 }
120 // Usprintf(debug,TEXT("sym %u counts %d p %u tospend %u \n"),sym,s->count,p,tospend); 120 // Usprintf(debug,TEXT("sym %u counts %d p %u tospend %u \n"),sym,s->count,p,tospend);
121 // DebugOutput(debug); 121 // DebugOutput(debug);
122 s=s->next; 122 s=s->next;
123 } 123 }
124 } 124 }
125 temp = temp->vine; 125 temp = temp->vine;
126 } 126 }
127 //Usprintf(debug,TEXT("Norm %u tospend %u\n"),Norm,tospend); 127 //Usprintf(debug,TEXT("Norm %u tospend %u\n"),Norm,tospend);
128 //DebugOutput(debug); 128 //DebugOutput(debug);
129 129
130 size_of_slice=tospend; 130 size_of_slice=tospend;
131 int symbolsleft=0; 131 int symbolsleft=0;
132 for (sym=1;sym<modelchars;sym++) 132 for (sym=1;sym<modelchars;sym++)
133 if (!probs[sym]) 133 if (!probs[sym])
134 symbolsleft++; 134 symbolsleft++;
135 for (sym=1;sym<modelchars;sym++) 135 for (sym=1;sym<modelchars;sym++)
136 if (!probs[sym]) { 136 if (!probs[sym]) {
137 ulong p=size_of_slice/symbolsleft; 137 ulong p=size_of_slice/symbolsleft;
138 probs[sym]+=p; 138 probs[sym]+=p;
139 tospend-=p; 139 tospend-=p;
140 } 140 }
141 141
142 // distribute what's left evenly 142 // distribute what's left evenly
143 tospend+=uniform; 143 tospend+=uniform;
144 for (sym=1;sym<modelchars;sym++) { 144 for (sym=1;sym<modelchars;sym++) {
145 ulong p=tospend/(modelchars-sym); 145 ulong p=tospend/(modelchars-sym);
146 probs[sym]+=p; 146 probs[sym]+=p;
147 tospend-=p; 147 tospend-=p;
148 } 148 }
149 //Usprintf(debug,TEXT("finaltospend %u\n"),tospend); 149 //Usprintf(debug,TEXT("finaltospend %u\n"),tospend);
150 //DebugOutput(debug); 150 //DebugOutput(debug);
151 151
152 // free(exclusions); // !!! 152 // free(exclusions); // !!!
153 // !!! NB by IAM: p577 Stroustrup 3rd Edition: "Allocating an object using new and deleting it using free() is asking for trouble" 153 // !!! NB by IAM: p577 Stroustrup 3rd Edition: "Allocating an object using new and deleting it using free() is asking for trouble"
154 delete[] exclusions; 154 delete[] exclusions;
155 return true; 155 return true;
156} 156}
157 157
158 158
159void CPPMLanguageModel::AddSymbol(CPPMLanguageModel::CPPMContext &context,int symbol) 159void CPPMLanguageModel::AddSymbol(CPPMLanguageModel::CPPMContext &context,int symbol)
160 // add symbol to the context 160 // add symbol to the context
161 // creates new nodes, updates counts 161 // creates new nodes, updates counts
162 // and leaves 'context' at the new context 162 // and leaves 'context' at the new context
163{ 163{
164 // sanity check 164 // sanity check
165 if (symbol==0 || symbol>=GetNumberModelChars()) 165 if (symbol==0 || symbol>=GetNumberModelChars())
166 return; 166 return;
167 167
168 CPPMnode *vineptr,*temp; 168 CPPMnode *vineptr,*temp;
169 int updatecnt=1; 169 int updatecnt=1;
170 170
171 temp=context.head->vine; 171 temp=context.head->vine;
172 context.head=context.head->add_symbol_to_node(symbol,&updatecnt); 172 context.head=context.head->add_symbol_to_node(symbol,&updatecnt);
173 vineptr=context.head; 173 vineptr=context.head;
174 context.order++; 174 context.order++;
175 175
176 while (temp!=0) { 176 while (temp!=0) {
177 vineptr->vine=temp->add_symbol_to_node(symbol,&updatecnt); 177 vineptr->vine=temp->add_symbol_to_node(symbol,&updatecnt);
178 vineptr=vineptr->vine; 178 vineptr=vineptr->vine;
179 temp=temp->vine; 179 temp=temp->vine;
180 } 180 }
181 vineptr->vine=root; 181 vineptr->vine=root;
182 if (context.order>MAX_ORDER){ 182 if (context.order>MAX_ORDER){
183 context.head=context.head->vine; 183 context.head=context.head->vine;
184 context.order--; 184 context.order--;
185 } 185 }
186} 186}
187 187
188 188
189// update context with symbol 'Symbol' 189// update context with symbol 'Symbol'
190void CPPMLanguageModel::EnterSymbol(CContext* Context, modelchar Symbol) 190void CPPMLanguageModel::EnterSymbol(CContext* Context, modelchar Symbol)
191{ 191{
192 CPPMLanguageModel::CPPMContext& context = * static_cast<CPPMContext *> (Context); 192 CPPMLanguageModel::CPPMContext& context = * static_cast<CPPMContext *> (Context);
193 193
194 CPPMnode *find; 194 CPPMnode *find;
195 CPPMnode *temp=context.head; 195 // CPPMnode *temp=context.head;
196 196
197 while (context.head) { 197 while (context.head) {
198 find =context.head->find_symbol(Symbol); 198 find =context.head->find_symbol(Symbol);
199 if (find) { 199 if (find) {
200 context.order++; 200 context.order++;
201 context.head=find; 201 context.head=find;
202 //Usprintf(debug,TEXT("found context %x order %d\n"),head,order); 202 //Usprintf(debug,TEXT("found context %x order %d\n"),head,order);
203 //DebugOutput(debug); 203 //DebugOutput(debug);
204 return; 204 return;
205 } 205 }
206 context.order--; 206 context.order--;
207 context.head=context.head->vine; 207 context.head=context.head->vine;
208 } 208 }
209 209
210 if (context.head==0) { 210 if (context.head==0) {
211 context.head=root; 211 context.head=root;
212 context.order=0; 212 context.order=0;
213 } 213 }
214 214
215} 215}
216 216
217 217
218void CPPMLanguageModel::LearnSymbol(CContext* Context, modelchar Symbol) 218void CPPMLanguageModel::LearnSymbol(CContext* Context, modelchar Symbol)
219{ 219{
220 CPPMLanguageModel::CPPMContext& context = * static_cast<CPPMContext *> (Context); 220 CPPMLanguageModel::CPPMContext& context = * static_cast<CPPMContext *> (Context);
221 AddSymbol(context, Symbol); 221 AddSymbol(context, Symbol);
222} 222}
223 223
224 224
225void CPPMLanguageModel::dumpSymbol(int symbol) 225void CPPMLanguageModel::dumpSymbol(int symbol)
226{ 226{
227 if ((symbol <= 32) || (symbol >= 127)) 227 if ((symbol <= 32) || (symbol >= 127))
228 printf( "<%d>", symbol ); 228 printf( "<%d>", symbol );
229 else 229 else
230 printf( "%c", symbol ); 230 printf( "%c", symbol );
231} 231}
232 232
233 233
234void CPPMLanguageModel::dumpString( char *str, int pos, int len ) 234void CPPMLanguageModel::dumpString( char *str, int pos, int len )
235 // Dump the string STR starting at position POS 235 // Dump the string STR starting at position POS
236{ 236{
237 char cc; 237 char cc;
238 int p; 238 int p;
239 for (p = pos; p<pos+len; p++) { 239 for (p = pos; p<pos+len; p++) {
240 cc = str [p]; 240 cc = str [p];
241 if ((cc <= 31) || (cc >= 127)) 241 if ((cc <= 31) || (cc >= 127))
242 printf( "<%d>", cc ); 242 printf( "<%d>", cc );
243 else 243 else
244 printf( "%c", cc ); 244 printf( "%c", cc );
245 } 245 }
246} 246}
247 247
248 248
249void CPPMLanguageModel::dumpTrie( CPPMLanguageModel::CPPMnode *t, int d ) 249void CPPMLanguageModel::dumpTrie( CPPMLanguageModel::CPPMnode *, int )
250 // diagnostic display of the PPM trie from node t and deeper 250 // diagnostic display of the PPM trie from node t and deeper
251{ 251{
252//TODO 252//TODO
253/* 253/*
254 dchar debug[256]; 254 dchar debug[256];
255 int sym; 255 int sym;
256 CPPMnode *s; 256 CPPMnode *s;
257 Usprintf( debug,TEXT("%5d %7x "), d, t ); 257 Usprintf( debug,TEXT("%5d %7x "), d, t );
258 //TODO: Uncomment this when headers sort out 258 //TODO: Uncomment this when headers sort out
259 //DebugOutput(debug); 259 //DebugOutput(debug);
260 if (t < 0) // pointer to input 260 if (t < 0) // pointer to input
261 printf( " <" ); 261 printf( " <" );
262 else { 262 else {
263 Usprintf(debug,TEXT( " %3d %5d %7x %7x %7x <"), t->symbol,t->count, t->vine, t->child, t->next ); 263 Usprintf(debug,TEXT( " %3d %5d %7x %7x %7x <"), t->symbol,t->count, t->vine, t->child, t->next );
264 //TODO: Uncomment this when headers sort out 264 //TODO: Uncomment this when headers sort out
265 //DebugOutput(debug); 265 //DebugOutput(debug);
266 } 266 }
267 267
268 dumpString( dumpTrieStr, 0, d ); 268 dumpString( dumpTrieStr, 0, d );
269 Usprintf( debug,TEXT(">\n") ); 269 Usprintf( debug,TEXT(">\n") );
270 //TODO: Uncomment this when headers sort out 270 //TODO: Uncomment this when headers sort out
271 //DebugOutput(debug); 271 //DebugOutput(debug);
272 if (t != 0) { 272 if (t != 0) {
273 s = t->child; 273 s = t->child;
274 while (s != 0) { 274 while (s != 0) {
275 sym =s->symbol; 275 sym =s->symbol;
276 276
277 dumpTrieStr [d] = sym; 277 dumpTrieStr [d] = sym;
278 dumpTrie( s, d+1 ); 278 dumpTrie( s, d+1 );
279 s = s->next; 279 s = s->next;
280 } 280 }
281 } 281 }
282*/ 282*/
283} 283}
284 284
285 285
286void CPPMLanguageModel::dump() 286void CPPMLanguageModel::dump()
287 // diagnostic display of the whole PPM trie 287 // diagnostic display of the whole PPM trie
288{ 288{
289// TODO: 289// TODO:
290/* 290/*
291 dchar debug[256]; 291 dchar debug[256];
292 Usprintf(debug,TEXT( "Dump of Trie : \n" )); 292 Usprintf(debug,TEXT( "Dump of Trie : \n" ));
293 //TODO: Uncomment this when headers sort out 293 //TODO: Uncomment this when headers sort out
294 //DebugOutput(debug); 294 //DebugOutput(debug);
295 Usprintf(debug,TEXT( "---------------\n" )); 295 Usprintf(debug,TEXT( "---------------\n" ));
296 //TODO: Uncomment this when headers sort out 296 //TODO: Uncomment this when headers sort out
297 //DebugOutput(debug); 297 //DebugOutput(debug);
298 Usprintf( debug,TEXT( "depth node symbol count vine child next context\n") ); 298 Usprintf( debug,TEXT( "depth node symbol count vine child next context\n") );
299 //TODO: Uncomment this when headers sort out 299 //TODO: Uncomment this when headers sort out
300 //DebugOutput(debug); 300 //DebugOutput(debug);
301 dumpTrie( root, 0 ); 301 dumpTrie( root, 0 );
302 Usprintf( debug,TEXT( "---------------\n" )); 302 Usprintf( debug,TEXT( "---------------\n" ));
303 //TODO: Uncomment this when headers sort out 303 //TODO: Uncomment this when headers sort out
304 //DebugOutput(debug); 304 //DebugOutput(debug);
305 Usprintf(debug,TEXT( "\n" )); 305 Usprintf(debug,TEXT( "\n" ));
306 //TODO: Uncomment this when headers sort out 306 //TODO: Uncomment this when headers sort out
307 //DebugOutput(debug); 307 //DebugOutput(debug);
308*/ 308*/
309} 309}