Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members  

context.cpp

Go to the documentation of this file.
00001 /***************************************************************************
00002     context.cpp  -  Definitions of Context methods
00003                              -------------------
00004     begin                : June 21 2002
00005     copyright            : (C) 2003 by Vojtìch Toman
00006     email                : vtoman@lit.cz
00007  ***************************************************************************/
00008 
00009 /***************************************************************************
00010  *                                                                         *
00011  *   This program is free software; you can redistribute it and/or modify  *
00012  *   it under the terms of the GNU General Public License as published by  *
00013  *   the Free Software Foundation; either version 2 of the License, or     *
00014  *   (at your option) any later version.                                   *
00015  *                                                                         *
00016  ***************************************************************************/
00017 
00018 
00027 #ifdef __GNUG__
00028 # pragma implementation
00029 #endif
00030 
00031 
00032 #include "context.h"
00033 
00034 
00035 
00037 #define MIN(a,b)                ((a) < (b) ? (a) : (b))
00038 
00039 
00040 
00049 #define INCR_SYMBOL_PROB_ACTUAL(symbol, inc)    \
00050   FreqValue _inc = (inc);                       \
00051   int p = symbol;                               \
00052   /* Increment stats */                         \
00053   while (p > 0)                                 \
00054     {                                           \
00055       tree[p] += _inc;                          \
00056       p = BACK(p);                              \
00057     }                                           \
00058   total += _inc;                                \
00059 
00060 
00061 
00067 #define INCR_SYMBOL_PROB_MPS(symbol)                    \
00068   {                                                     \
00069     if (symbol == mostFreqSymbol)                       \
00070       mostFreqCount += _inc;                            \
00071     else                                                \
00072       if ((_high) - (_low) + (_inc) > mostFreqCount)    \
00073         {                                               \
00074           mostFreqSymbol = symbol;                      \
00075           mostFreqCount = (_high) - (_low) + (_inc);    \
00076           mostFreqPos   = _low;                         \
00077         }                                               \
00078       else                                              \
00079         if (symbol < mostFreqSymbol)                    \
00080           mostFreqPos += _inc;                          \
00081   }
00082 
00083 
00084 
00085 #ifdef MOST_PROB_AT_END
00086 
00089 #define INCR_SYMBOL_PROB(symbol, low1, high1, inc1)     \
00090 {                                                       \
00091   FreqValue _low = low1;                                \
00092   FreqValue _high = high1;                              \
00093   INCR_SYMBOL_PROB_ACTUAL(symbol, inc1)                 \
00094   INCR_SYMBOL_PROB_MPS(symbol)                          \
00095 }
00096 
00097 #else
00098 
00102 #define INCR_SYMBOL_PROB(symbol, low1, high1, inc1)     \
00103 {                                                       \
00104   INCR_SYMBOL_PROB_ACTUAL(symbol, inc1)                 \
00105 }
00106 #endif
00107 
00108 
00109 
00110 
00111 
00112 
00119 #define GET_COUNT(symbol, c)                    \
00120 {                                               \
00121   if ((symbol) & 1)                             \
00122     c = tree[symbol];                           \
00123   else                                          \
00124     {                                           \
00125       int q = symbol + 1;                       \
00126       int z = MIN(FORW(symbol), maxLength + 1); \
00127       c = tree[symbol];                         \
00128       while (q < z)                             \
00129         {                                       \
00130           c -= tree[q];                         \
00131           q  = FORW(q);                         \
00132         }                                       \
00133     }                                           \
00134 }
00135 
00136 
00137 
00141 #define ADJUST_ZERO_FREQ()                      \
00142 {                                               \
00143   FreqValue diff;                               \
00144   diff = ZERO_FREQ_PROB - tree[1];              \
00145   if (diff != 0)                                \
00146     INCR_SYMBOL_PROB(1, 0, tree[1], diff);      \
00147 }
00148 
00149 
00150 
00156 #define ZERO_FREQ_PROB          ((FreqValue)nSingletons)
00157 
00158 
00159 
00160 
00164 Context::Context(void)
00165   : ContextBase()
00166 {
00167   tree = 0;
00168 }
00169 
00170 
00171 
00178 Context::Context(int size, ContextType type)
00179   : ContextBase()
00180 {
00181   tree = 0;
00182   setType(size, type);
00183 }
00184 
00185 
00186 
00190 Context::~Context(void)
00191 {
00192   if (tree)
00193     free(tree);
00194 }
00195 
00196 
00197 
00201 void Context::initZeroFreq()
00202 {
00203   /* nSingletons is now actually nSingletons + 1, but this means
00204      we can use nSingletons directly as ZERO_FREQ_PROB */
00205   if (type == DynamicContext)
00206     nSingletons += incr;
00207   else
00208     nSingletons = 0;
00209 }
00210 
00211 
00212 
00221 void Context::setType(int len, ContextType ct)
00222 {
00223   int i;
00224   int size = 1;
00225 
00226   //add one entry for EOM symbol
00227   len++;
00228   endOfMessage = length = len;
00229 
00230   //increment length to accommodate the fact 
00231   //that symbol 0 is stored at pos 2 in the array.
00232   //(Escape symbol is at pos 1, pos 0 is not used).
00233   length+=2;
00234 
00235   //round length up to next power of two
00236   while (size < length-1)
00237     size = size << 1;
00238 
00239   //malloc context structure and array for frequencies
00240   if (!(tree = (FreqValue *)malloc((size+1)*sizeof(FreqValue))))
00241     FATAL("Not enough memory to create context!");
00242 
00243   //initialBaseSize = length;   //for more sensible initial installation of symbols
00244   initialSize = size;   //save for purging later
00245   length = 1;           //current no. of symbols
00246   total = 0;            //total frequency
00247   nSymbols = 1;         //count of symbols
00248   type = ct;            //is context DynamicContext or StaticContext
00249   maxLength = size;     //no. symbols before growing
00250 
00251   mostFreqSymbol = -1;  //Initially no mostFreqSymbol
00252   mostFreqCount = 0;
00253   mostFreqPos = 0;
00254     
00255   //initialize contents of tree array to zero
00256   for (i = 0; i <= maxLength; i++)
00257     tree[i] = 0;
00258 
00259   //increment is initially 2^f
00260   incr = (FreqValue) 1 << F_BITS; 
00261   nSingletons = 0;
00262 
00263   initZeroFreq();
00264   ADJUST_ZERO_FREQ();
00265 }
00266 
00267 
00268 
00274 int Context::initialize(void)
00275 {
00276   //this is better and less "vasting"
00277   for (int i=0; i < endOfMessage; i++)
00278     installSymbol(i);
00279 //   for (int i=0; i < initialSize; i++)
00280 //     installSymbol(i);
00281 
00282   if (installSymbol(endOfMessage) == TooManySymbols)
00283     FATAL("TOO MANY SYMBOLS: Couldn't install initial symbols. "
00284           "(Perhaps F_BITS  is too small?)");
00285 
00286   initialized = true;
00287   return endOfMessage;
00288 }
00289 
00290 
00291 
00292 int Context::lastFixedSymbol(void)
00293 {
00294   return endOfMessage;
00295 }
00296 
00297 
00307 int Context::installSymbol(int symbol)
00308 {
00309   int i;
00310   FreqValue low, high;
00311 
00312   //Increment because first user symbol (symbol 0)
00313   //is stored at array position 2
00314   symbol+=2;
00315 
00316 
00317   //if the symbol is already installed, don't do anything
00318   if (symbol <= length)
00319     {
00320 //       DBG("NOT INSTALLED SYMBOL: " << symbol << " length: "<< length);
00321       return Context::Ok;
00322     }
00323 
00324   //if new symbol is greater than current array length then double length 
00325   //of array 
00326   while (symbol > maxLength) 
00327     {
00328       //DBG("MAXLENGTH = " << maxLength << " SYMBOL = " << symbol);
00329       tree = (FreqValue *)realloc(tree, (2*maxLength+1) * sizeof(FreqValue));
00330       if (!tree)
00331         {
00332           ERR("Not enough memory to expand context!");
00333           return Context::NoMemory;
00334         }
00335 
00336       //clear new part of table to zero
00337       for (i = maxLength+1; i <= 2*maxLength; i++)
00338         tree[i] = 0;
00339       
00340       maxLength <<= 1;
00341       //DBG("NEW MAXLENGTH = " << maxLength);
00342     }
00343 
00344   //check that we are not installing too many symbols
00345   if ((unsigned long)((nSymbols + 1) << 1) >= MAX_FREQUENCY)
00346     //cannot install another symbol as all frequencies will 
00347     //halve to one and an infinite loop will result
00348     return Context::TooManySymbols;
00349     
00350   if (symbol > length)  //update length if necessary
00351     length = symbol;
00352   nSymbols++;           //increment count of symbols
00353 
00354   getInterval(&low, &high, symbol);
00355 
00356   //update the number of singletons if context is DynamicContext
00357   INCR_SYMBOL_PROB(symbol, low, high, incr);
00358   if (type == DynamicContext)
00359     nSingletons += incr;
00360 
00361   ADJUST_ZERO_FREQ();
00362 
00363   //halve frequency counts if total greater than Max_frequency
00364   while (total > MAX_FREQUENCY)
00365     halveContext();
00366 
00367   return Context::Ok;
00368 }
00369 
00370 
00371 int Context::encodeEndOfMessage() throw (ExaltContextNotInitializedException, ExaltIOException)
00372 {
00373   if (!initialized)
00374     {
00375       throw ExaltContextNotInitializedException();
00376     }
00377   else
00378     return encode(endOfMessage);
00379 }
00380 
00389 int Context::encode(int symbol) throw (ExaltContextNotInitializedException, ExaltIOException)
00390 {
00391   FreqValue low, high, low_w, high_w;
00392 
00393   if (!initialized)
00394     {
00395       throw ExaltContextNotInitializedException();
00396     }
00397 
00398   symbol+=2;
00399   if ((symbol > 0) && (symbol <= maxLength))
00400     {
00401       if (mostFreqSymbol == symbol)
00402         {
00403           low  = mostFreqPos;
00404           high = low + mostFreqCount;
00405         }
00406       else
00407         getInterval(&low, &high, symbol);
00408     }
00409   else
00410     low = high = 0;
00411     
00412   if (low == high)
00413     {
00414       if (ZERO_FREQ_PROB == 0)
00415         FATAL("Cannot code zero-probability novel symbol!");
00416 
00417       //encode the escape symbol if unknown symbol
00418       symbol = 1;
00419       if (mostFreqSymbol == 1)
00420         {
00421           low = mostFreqPos;
00422           high = low + mostFreqCount;
00423         }
00424       else
00425         getInterval(&low, &high, symbol);
00426     }
00427 
00428   //Shift high and low if required so that most probable symbol
00429   //is at the end of the range
00430   //(Shifted values are low_w, and high_w, as original values
00431   //are needed when updating the stats)
00432 
00433 #ifdef MOST_PROB_AT_END
00434   if (symbol > mostFreqSymbol)
00435     {
00436       low_w  = low  - mostFreqCount;
00437       high_w = high - mostFreqCount;
00438     }
00439   else
00440     if (symbol == mostFreqSymbol)
00441       {
00442         low_w  = total - mostFreqCount;
00443         high_w = total;
00444       }
00445     else
00446       {
00447         low_w  = low;
00448         high_w = high;
00449       }
00450 #else
00451   low_w  = low;
00452   high_w = high;
00453 #endif
00454 
00455   //call the coder with the low, high and total for this symbol
00456   //(with low_w, high_w:  Most probable symbol moved to end of range)
00457 
00458   arithCodec->arithmeticEncode(low_w, high_w, total);
00459 
00460   if (symbol != 1)      //If not the special ESC / NOT_KNOWN symbol
00461     {
00462       //update the singleton count if symbol was previously a singleton
00463       if (type == DynamicContext && high-low == incr)
00464         nSingletons -= incr;
00465 
00466       //increment the symbol's frequency count
00467       INCR_SYMBOL_PROB(symbol, low, high, incr);
00468     }
00469 
00470   ADJUST_ZERO_FREQ();
00471 
00472   while (total > MAX_FREQUENCY)
00473     halveContext();
00474 
00475   if (symbol == 1)
00476     return Context::NotKnown;
00477 
00478   return Context::Ok;
00479 }
00480 
00481 
00482 
00483 
00489 int Context::decode(void) throw (ExaltContextNotInitializedException, ExaltIOException)
00490 {
00491   int symbol;
00492   int p, m, e;
00493   FreqValue low, high, target;
00494   int n = maxLength;
00495 
00496 
00497   if (!initialized)
00498     {
00499       throw ExaltContextNotInitializedException();
00500     }
00501  
00502   target = arithCodec->arithmeticDecodeTarget(total);
00503 
00504 #ifdef MOST_PROB_AT_END
00505   //Check if most probable symbol (shortcut decode)
00506   if (target >= total - mostFreqCount)
00507     {
00508       arithCodec->arithmeticDecode( total - mostFreqCount, total, total);
00509       symbol = mostFreqSymbol;
00510       low    = mostFreqPos;
00511       high   = low + mostFreqCount;
00512 
00513       INCR_SYMBOL_PROB(symbol, low, high, incr);
00514       
00515       if (symbol != 1) 
00516         if (type == DynamicContext && high-low == incr)
00517           nSingletons -= incr;
00518     }
00519   else
00520     //Not MPS, have to decode slowly
00521     {
00522       if (target >= mostFreqPos)
00523         target += mostFreqCount;
00524 #endif
00525       p = 1; low = 0;
00526       while ( ((p << 1) <= maxLength ) && (tree[p] <= target))
00527         {
00528           target -= tree[p];
00529           low    += tree[p];
00530           p      <<= 1;
00531         }
00532 
00533       symbol = p;
00534       m = p >> 1;
00535       e = 0;
00536 
00537       while (m >= 1)
00538         {
00539           if (symbol + m <= n)
00540             {
00541               e += tree[symbol + m];
00542               if (tree[symbol] - e <= target) 
00543                 {
00544                   target    -= tree[symbol] - e;
00545                   low       += tree[symbol] - e;
00546                   if (symbol != 1) tree[symbol] += incr;
00547                   symbol    += m;
00548                   e         =  0;
00549                 }
00550             }
00551           m >>= 1;
00552         }
00553       if (symbol!= 1) tree[symbol] += incr;
00554 
00555       if (symbol & 1)
00556         high = low + tree[symbol];
00557       else {
00558         GET_COUNT(symbol, high);
00559         high += low;
00560       }
00561       if (symbol != 1) high -= incr;
00562 
00563 #ifdef MOST_PROB_AT_END
00564       if (low >= mostFreqPos)
00565         //Ie: Was moved
00566         arithCodec->arithmeticDecode(low  - mostFreqCount,
00567                                      high - mostFreqCount,
00568                                      total);
00569       else
00570 #endif
00571         arithCodec->arithmeticDecode(low, high, total);
00572 
00573       //update the singleton count if symbol was previously a singleton
00574       if (symbol != 1)
00575         {
00576           if (type == DynamicContext && high-low == incr)
00577             nSingletons -= incr;
00578 
00579           total += incr;
00580 
00581           if (symbol == mostFreqSymbol)
00582             mostFreqCount += incr;
00583           else
00584             if (high-low+incr > mostFreqCount)
00585               { 
00586                 mostFreqSymbol = symbol;
00587                 mostFreqCount  = high - low + incr;
00588                 mostFreqPos    = low;
00589               }
00590             else
00591               if (symbol < mostFreqSymbol)
00592                 mostFreqPos += incr;
00593         }
00594       
00595 #ifdef MOST_PROB_AT_END
00596     }  //If not MPS
00597 #endif
00598 
00599   ADJUST_ZERO_FREQ();
00600 
00601   //halve all frequencies if necessary
00602   while (total > MAX_FREQUENCY)
00603     halveContext();
00604 
00605   if (symbol == 1)
00606     return Context::NotKnown;
00607 
00608   return symbol - 2;
00609 }
00610 
00611 
00612 
00620 void Context::getInterval(FreqValue *pLow, FreqValue *pHigh, int symbol)
00621 {
00622   FreqValue low, count;
00623   int p, q;
00624 
00625   //go too far
00626   for (p = 1, low = 0; p < symbol; )
00627     {
00628       low += tree[p], p <<= 1;
00629     }
00630 
00631   //subtract off the extra freqs from low
00632   q = symbol;
00633   while ((q != p) && (q <= maxLength))
00634     {
00635       low -= tree[q], q = FORW(q);
00636     }
00637 
00638   GET_COUNT(symbol, count);
00639   
00640   *pLow = low;
00641   *pHigh = low + count;
00642 }
00643 
00644  
00645 
00649 void Context::halveContext(void)
00650 {
00651   int  shifts, p, symbol;
00652 
00653   //halve increment
00654   incr = (incr + MIN_INCR) >> 1;
00655   if (incr < MIN_INCR) 
00656     incr = MIN_INCR;
00657   nSingletons = incr;
00658     
00659   //Convert Moffat tree to array of freqs
00660   for (shifts=0 , p = maxLength ; p > 1 ; shifts++ ) p >>= 1;
00661   p  = 1 << shifts;      //p is now to 2^floor(log_2 pContext->max_length)
00662   while (p > 1)
00663     {
00664       symbol = p;
00665       while (symbol + (p >> 1) <= maxLength )
00666         {
00667           tree[symbol] -= tree[symbol + (p >> 1)];
00668           symbol += p;
00669         }
00670       p >>= 1;
00671     }
00672 
00673 
00674   //Halve the counts (ignore tree[1] as it will be changed soon)
00675   total = 0;
00676   for (p = 2; p <= maxLength; p++) 
00677     {
00678       tree[p] = (tree[p] + 1) >> 1;
00679       total += tree[p];
00680       if (tree[p] == incr)
00681         nSingletons += incr;
00682     }
00683 
00684 
00685   //Convert array of freqs to Moffat tree
00686   for (p = 2; p <= maxLength; )
00687     {
00688       symbol = p;
00689       while (symbol + (p >> 1) <= maxLength )
00690         {
00691           tree[symbol] += tree[symbol + (p >> 1)];
00692           symbol += p;
00693         }
00694       p <<= 1;
00695     }
00696 
00697   if (type == StaticContext)
00698     nSingletons = 0;
00699 
00700   tree[1] = ZERO_FREQ_PROB;
00701   total += ZERO_FREQ_PROB;
00702 
00703   //Recalc new mostFreqSymbol info if it exists
00704   //(since roundoff may mean not exactly half of previous value)
00705 
00706   if (mostFreqSymbol != -1) 
00707     {
00708       FreqValue low, high;
00709 
00710       getInterval(&low, &high, mostFreqSymbol);
00711       mostFreqCount = high-low;
00712       mostFreqPos = low;
00713     }
00714 
00715   ADJUST_ZERO_FREQ();
00716 }
00717 
00718 
00722 void Context::purgeContext(void)
00723 {
00724   int i;
00725 
00726   if (tree)
00727     free(tree);
00728     
00729   //malloc new tree of original size
00730   if (!(tree = (FreqValue *)malloc((initialSize + 1) * sizeof(FreqValue))))
00731     FATAL("Not enough memory to create context!");
00732 
00733   length = 1;
00734   total = 0;
00735   nSymbols = 1;         
00736 
00737   mostFreqSymbol = -1;  //Indicates no such symbol
00738   mostFreqCount = 0;
00739   mostFreqPos = 0;
00740 
00741   maxLength = initialSize;
00742   for (i = 0; i <= initialSize; i++)
00743     tree[i] = 0;
00744 
00745   //increment is initially 2 ^ f
00746   incr = (FreqValue) 1 << F_BITS;
00747   nSingletons = 0;
00748 
00749   initZeroFreq();
00750   ADJUST_ZERO_FREQ();
00751 }
00752 

Generated on Wed Feb 5 10:43:01 2003 for Exalt by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002