Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members  

kygrammar.cpp

Go to the documentation of this file.
00001 /***************************************************************************
00002     kygrammar.cpp  -  Definitions of class KYGrammar methods.
00003                              -------------------
00004     begin                : June 21 2002
00005     copyright            : (C) 2003 by Vojtìch Toman
00006     email                : vtoman@lit.cz
00007 ***************************************************************************/
00008 
00009 /***************************************************************************
00010  *                                                                         *
00011  *   This program is free software; you can redistribute it and/or modify  *
00012  *   it under the terms of the GNU General Public License as published by  *
00013  *   the Free Software Foundation; either version 2 of the License, or     *
00014  *   (at your option) any later version.                                   *
00015  *                                                                         *
00016  ***************************************************************************/
00017 
00026 #ifdef __GNUG__
00027 # pragma implementation
00028 #endif
00029 
00030 
00031 #include "kygrammar.h"
00032 
00033 
00034 
00040 #define DECREASE_INPUT_QUEUE_LENGTH(_amount_)           inputQueueLength -= _amount_
00041 
00042 
00050 #define INCREASE_INPUT_QUEUE_LENGTH(_amount_)   \
00051 {                                               \
00052   inputQueueLength += _amount_;                 \
00053   if (inputQueueLength > maxInputQueueLength)   \
00054     maxInputQueueLength = inputQueueLength;     \
00055 }
00056 
00057 
00067 #define TESTEDRULES_POP(_val_, _stack_, _tmpStack_)     \
00068 {                                                       \
00069   _val_ = _stack_->rel;                                 \
00070   _tmpStack_ = _stack_;                                 \
00071   _stack_ = _stack_->next;                              \
00072   DELETE(_tmpStack_);                                   \
00073 }
00074   
00075 
00085 #define TESTEDRULES_PUSH(_val_, _stack_, _tmpStack_)    \
00086 {                                                       \
00087   NEW(_tmpStack_, TestedElement);                       \
00088   _tmpStack_->rel = _val_;                              \
00089   _tmpStack_->next = _stack_;                           \
00090   _stack_ = _tmpStack_;                                 \
00091 }
00092   
00093 
00099 #define DIGRAM_REMOVE(_el1_, _el2_)                                             \
00100 {                                                                               \
00101   if (_el1_->type == Terminal)                                                  \
00102     {nd = terminalDigrams->find(_el1_->value);}                                 \
00103   else                                                                          \
00104     {nd = variableDigrams->find(_el1_->rule->id);}                              \
00105                                                                                 \
00106   if (nd)                                                                       \
00107     for (r = nd->neighbours.first(); r; r = nd->neighbours.next())              \
00108       if (r->origin == _el2_)                                                   \
00109         {                                                                       \
00110           nd->neighbours.remove();                                              \
00111           DELETE(r);                                                            \
00112           break;                                                                \
00113         }                                                                       \
00114 }
00115 
00116 
00122 #define DIGRAM_ADD(_el1_, _el2_, _rulePtr_)                                     \
00123 {                                                                               \
00124   bool _needInsert_ = true;                                                     \
00125   if (_el1_->type == Terminal)                                                  \
00126     {                                                                           \
00127       if (!(nd = terminalDigrams->find(_el1_->value)))                          \
00128         {                                                                       \
00129           NEW(nd, NeighboursDescription);                                       \
00130           terminalDigrams->insert(_el1_->value, nd);                            \
00131         }                                                                       \
00132     }                                                                           \
00133   else                                                                          \
00134     {                                                                           \
00135       if (!(nd = variableDigrams->find(_el1_->rule->id)))                       \
00136         {                                                                       \
00137           NEW(nd, NeighboursDescription);                                       \
00138           variableDigrams->insert(_el1_->rule->id, nd);                         \
00139         }                                                                       \
00140     }                                                                           \
00141                                                                                 \
00142   for (r = nd->neighbours.first(); r; r = nd->neighbours.next())                \
00143     if (r->origin->type == _el2_->type && r->origin->rule == _el2_->rule)       \
00144       {                                                                         \
00145         _needInsert_ = false;                                                   \
00146         break;                                                                  \
00147       }                                                                         \
00148                                                                                 \
00149  if (_needInsert_)                                                              \
00150    {                                                                            \
00151      NEW(r, RuleElementNeighbour);                                              \
00152      r->origin = _el2_;                                                         \
00153      r->rule = _rulePtr_;                                                       \
00154      nd->neighbours.append(r);                                                  \
00155    }                                                                            \
00156 }
00157 
00158 
00159 
00165 #define DIGRAM_CHANGE(_el1_, _el2_, _rulePtr_)                                  \
00166 {                                                                               \
00167   if (_el1_->type == Terminal)                                                  \
00168     nd = terminalDigrams->find(_el1_->value);                                   \
00169   else                                                                          \
00170     nd = variableDigrams->find(_el1_->rule->id);                                \
00171                                                                                 \
00172   if (nd)                                                                       \
00173     for (r = nd->neighbours.first(); r; r = nd->neighbours.next())              \
00174       if (r->origin == _el2_)                                                   \
00175         {                                                                       \
00176           r->rule = _rulePtr_;                                                  \
00177           break;                                                                \
00178         }                                                                       \
00179 }
00180 
00181 
00187 #define RIGHT_SIDE_APPEND(_rule_, _el_) \
00188 if (!_rule_->body)                      \
00189 {                                       \
00190   _rule_->body = _el_;                  \
00191   _rule_->last = _el_;                  \
00192   _el_->next = 0;                       \
00193   _el_->prev = 0;                       \
00194 }                                       \
00195 else                                    \
00196 {                                       \
00197   _rule_->last->next = _el_;            \
00198   _el_->prev = _rule_->last;            \
00199   _el_->next = 0;                       \
00200   _rule_->last = _el_;                  \
00201 }
00202 
00203 
00209 #define RIGHT_SIDE_IS_EMPTY(_rule_)     (_rule_->last == 0)
00210 
00211 
00220 #define REPRESENTED_BY_APPEND(_list_, _rule_)                   \
00221 {                                                               \
00222   bool _needInsert_ = true;                                     \
00223                                                                 \
00224   for (Rule *_rr_ = _list_.first(); _rr_; _rr_ = _list_.next()) \
00225     {                                                           \
00226       if (_rr_ == _rule_)                                       \
00227         {                                                       \
00228           _needInsert_ = false;                                 \
00229           break;                                                \
00230         }                                                       \
00231     }                                                           \
00232   if (_needInsert_)                                             \
00233     _list_.append(_rule_);                                      \
00234 }
00235 
00236 
00237 
00241 KYGrammar::KYGrammar(void)
00242   : GrammarBase()
00243 {
00244   initKYGrammar();
00245 }
00246 
00247 
00248 
00252 void KYGrammar::initKYGrammar(void)
00253 {
00254   size = 0;
00255   newSymbolToInstall = 0;
00256   inputQueueLength = maxInputQueueLength = 0;
00257 
00258   totalTime = totalSymbols = 0;
00259   variablesEncoded = 0;
00260   terminalsEncoded = 0;
00261 
00262   eatDataPeriodicity = KY_GRAMMAR_EATDATA_PERIODICITY;
00263 
00264   NEW(flushStack, RuleElement*[KY_GRAMMAR_FLUSH_STACK_SIZE]);
00265   flushStackPos = -1;
00266 
00267   NEW(ruleStack, Rule*[KY_GRAMMAR_RULE_STACK_SIZE]);
00268   ruleStackPos = -1;
00269 
00270   outputDevice = 0;
00271 
00272   context = 0;
00273   useContextForOutput = false;
00274 
00275   NEW(ruleSet, RuleSet);
00276 
00277   ruleCounter = 0;
00278 
00279   //no input
00280   inputFirst = inputLast = 0;
00281   NEW(testedRules, TestedRules);
00282 
00283   NEW(availableRuleNumbers, Queue<RuleId>);
00284 
00285   availableRuleNumbers->setAutoDelete(true);
00286 
00287   runCount = 1;
00288 
00289   rootRule = 0;
00290   createRootRule();
00291 
00292   NEW(terminalDigrams, TerminalDigrams);
00293   terminalDigrams->setAutoDelete(true);
00294  
00295   NEW(variableDigrams, VariableDigrams);
00296   variableDigrams->setAutoDelete(true);
00297 
00298   //initially, I = false (set of variables is unchanged)
00299   I = false;
00300 }
00301 
00302 
00303 
00307 KYGrammar::~KYGrammar(void)
00308 {
00309   //force processing of the unprocessed data
00310   flush();
00311 
00312   DELETE_ARRAY(flushStack);
00313   DELETE_ARRAY(ruleStack);
00314 
00315   if (ExaltOptions::getOption(ExaltOptions::PrintGrammar) == ExaltOptions::Yes)
00316     print();
00317 
00318   if (ExaltOptions::getOption(ExaltOptions::Verbose) == ExaltOptions::Yes)
00319     {
00320       OUTPUTENL("KY grammar statistics");
00321       OUTPUTE("  Total symbols processed: \t\t");
00322       if (useContextForOutput)
00323         {
00324           //compression
00325           OUTPUTENL(totalSymbols);
00326         }
00327       else
00328         {
00329           //decompression
00330           OUTPUTENL(runCount-1);
00331         }
00332 
00333       OUTPUTENL("  Symbols appended to the root rule: \t" << runCount-1);
00334       OUTPUTENL("  Number of rules: \t\t\t" << ruleSet->count());
00335 
00336       if (useContextForOutput)
00337         {
00338           //compression
00339           OUTPUTENL("  Variables encoded: \t\t\t" << variablesEncoded);
00340           OUTPUTENL("  Terminals encoded: \t\t\t" << terminalsEncoded);
00341           OUTPUTENL("  Maximum length of the input queue: \t" << maxInputQueueLength);
00342           OUTPUTENL("  Average time per symbol: \t\t" << (double)totalTime/totalSymbols << " microsecs");
00343         }
00344 
00345       OUTPUTEENDLINE;
00346     }
00347 
00348 
00349   DELETE(ruleSet);
00350 
00351   DELETE(testedRules);
00352   DELETE(availableRuleNumbers);
00353 
00354   DELETE(terminalDigrams);
00355   DELETE(variableDigrams);
00356 
00357   deleteDefaultTextCodec();
00358 }
00359 
00360 
00361 
00365 void KYGrammar::purge(void)
00366 {
00367   DELETE(flushStack);
00368   DELETE(ruleStack);
00369 
00370   DELETE(ruleSet);
00371 
00372   DELETE(testedRules);
00373   DELETE(availableRuleNumbers);
00374 
00375   DELETE(terminalDigrams);
00376   DELETE(variableDigrams);
00377 
00378 
00379   NEW(flushStack, RuleElement*[KY_GRAMMAR_FLUSH_STACK_SIZE]);
00380   flushStackPos = -1;
00381 
00382   NEW(ruleStack, Rule*[KY_GRAMMAR_RULE_STACK_SIZE]);
00383   ruleStackPos = -1;
00384 
00385   NEW(ruleSet, RuleSet);
00386 
00387   NEW(testedRules, TestedRules);
00388 
00389   NEW(availableRuleNumbers, Queue<RuleId>);
00390 
00391   availableRuleNumbers->setAutoDelete(true);
00392 
00393   runCount = 1;
00394   ruleCounter = 0;
00395 
00396   rootRule = 0;
00397   createRootRule();
00398 
00399   NEW(terminalDigrams, TerminalDigrams);
00400   terminalDigrams->setAutoDelete(true);
00401  
00402   NEW(variableDigrams, VariableDigrams);
00403   variableDigrams->setAutoDelete(true);
00404 
00405   //initially, I = false (set of variables is unchanged)
00406   I = false;
00407 
00408 
00409   size = 0;
00410 }
00411 
00412 
00413 
00419 void KYGrammar::setAlphabetBaseSize(size_t size)
00420 {
00421   if (context)
00422     {
00423       if (!context->isInitialized())
00424         {
00425           //initialize the context only if it is not initialized yet
00426           context->setType(size, DynamicContext);
00427           context->initialize();
00428           
00429           //store the size of the alphabet in output device
00430           char fibBuf[10];
00431           size_t nrItems = Fibonacci::encodeToBuffer(fibBuf, SIZEOF_CHAR, size);
00432           outputDevice->writeData(fibBuf, nrItems);
00433         }
00434     }
00435 
00436   //output device is needed only for storing the alphabet size
00437   outputDevice = 0;
00438 }
00439 
00440 
00441 
00447 void KYGrammar::setOutputDevice(IODevice *device)
00448 {
00449   outputDevice = device;
00450 }
00451 
00452 
00458 Rule *KYGrammar::createRootRule(void)
00459 {
00460   if (rootRule)
00461     {
00462       WRN("Root roole already exists --> cannot create root rule.");
00463       return 0;
00464     }
00465   else
00466     {
00467       NEW(rootRule, Rule);
00468 
00469       rootRule->counter = 1;
00470       
00471       if (!rootRule)
00472         return 0;
00473       
00474       rootRule->refCount = 1;
00475       rootRule->id = ruleCounter;
00476       rootRule->body = 0;
00477       rootRule->last = 0;
00478 
00479       ruleCounter++;
00480 
00481       ruleSet->append(rootRule);
00482 
00483       return rootRule;
00484     }
00485 }
00486 
00487 
00488 
00497 Rule *KYGrammar::createRule(RuleElement *start, RuleElement *last)
00498 {
00499   Rule *rule;
00500 
00501   NEW(rule, Rule);
00502 
00503 
00504   rule->counter = 1;
00505 
00506   rule->matchRun = 0;
00507   rule->matchResult = Ignore;
00508 
00509   rule->body = start;
00510   rule->last = last;
00511 
00512   rule->refCount = 0;
00513 
00514 
00515   if (availableRuleNumbers->isEmpty())
00516     {
00517       //if no of the old ids can be reused, use ruleCounter
00518       rule->id = ruleCounter;
00519 
00520       //context->installSymbol((long)(context->lastFixedSymbol() + ruleCounter));
00521       newSymbolToInstall = context->lastFixedSymbol() + ruleCounter;
00522 
00523       DBG("Installing symbol: " << (long)(context->lastFixedSymbol() + ruleCounter) << "(" << context->lastFixedSymbol() << "," << ruleCounter << ")");
00524       ruleCounter++;
00525     }
00526   else
00527     {
00528       //otherwise use some old id
00529       RuleId *rid;
00530 
00531       rid = availableRuleNumbers->dequeue();
00532 
00533       rule->id = *rid;
00534 
00535       DELETE(rid);
00536     }
00537 
00538   ruleSet->append(rule);
00539 
00540   return rule;
00541 }
00542 
00543 
00544 
00552 Rule *KYGrammar::findRule(RuleId id)
00553 {
00554   for (Rule *r = ruleSet->first(); r; r = ruleSet->next())
00555     {
00556       if (r->id == id)
00557         return r;
00558     }
00559 
00560   return 0;
00561 }
00562 
00563 
00564 
00574 bool KYGrammar::ruleMatchesInput(TestedRule *testedRule, bool flush = false)
00575 {
00576   RuleElement *rel;
00577   unsigned int length;
00578   InputItem *i, *ii;
00579 
00580   if (testedRule->rule->matchResult == DoesntMatch && testedRule->rule->matchRun == runCount)
00581     return false;
00582 
00583   testedRule->rule->matchRun = runCount;
00584 
00585   if (!flush)
00586     {
00587       TestedElement *stack = testedRule->currentElements;
00588       TestedElement *s;
00589       //not in the "flush" mode
00590       
00591       if (!stack)
00592         {
00593           //the stack of elements is empty --> tested rule should be initialized
00594           rel = testedRule->rule->body;
00595           CHECK_POINTER(rel);
00596           length = 0;
00597           i = inputFirst;
00598         }
00599       else
00600         { 
00601           TESTEDRULES_POP(rel, stack, s);
00602           length = testedRule->matchedLength;
00603           i = testedRule->lastInputItem->next;
00604         }
00605 
00606       //for sure
00607       ii = i;
00608 
00609       while (i)
00610         {
00611           if (rel->type == Terminal)
00612             {
00613               //terminal --> simply compare value with current input item
00614           
00615               if (rel->value != i->value)
00616                 {
00617                   //the rule doesn't match...
00618                   testedRule->rule->matchResult = DoesntMatch;
00619                   testedRule->rule->matchRun = runCount;
00620 
00621                   while (stack)
00622                     {
00623                       TESTEDRULES_POP(rel, stack, s);
00624 
00625                       if (length == 1)
00626                         {
00627                           rel->rule->matchRun = runCount;
00628                           rel->rule->matchResult = DoesntMatch;
00629                         }
00630                     }
00631                   testedRule->currentElements = 0;
00632                   return false;
00633                 }
00634               else
00635                 {
00636                   //rule element matches --> increase the length and move to the next rule element
00637                   length++;
00638                   rel = rel->next;
00639               
00640                   ii = i;
00641                   i = i->next;
00642 
00643                   if (!rel)
00644                     {
00645                       //we have examined the complete rule --> check the stack if the "parent" rule element is present
00646 
00647                       while (stack && !rel)
00648                         {
00649                           //jump up if rel is the last element of the rule
00650                           TESTEDRULES_POP(rel, stack, s);
00651 
00652                           rel = rel->next;
00653                         }
00654 
00655                       if (!stack && !rel)
00656                         {
00657                           //the stack is empty --> we have examined whole rule tree
00658                           //the rule matches and matches completely
00659                           testedRule->currentElements = 0;
00660                           testedRule->matchedLength = length;
00661                           testedRule->matchesCompletely = true;
00662                           testedRule->rule->matchResult = Matches;
00663                           return true;
00664                         }
00665                     }
00666                 }
00667             }
00668           else
00669             {
00670               //variable --> push current rel to the stack and jump to the referenced rule
00671               TESTEDRULES_PUSH(rel, stack, s);
00672 
00673               rel = rel->rule->body;
00674             }
00675         }
00676 
00677       //the rule represents more than the input
00678       //push the last visited rule element
00679       TESTEDRULES_PUSH(rel, stack, s);
00680       testedRule->currentElements = stack;
00681       testedRule->matchesCompletely = false;
00682       testedRule->matchedLength = length;
00683       testedRule->lastInputItem = ii;
00684       testedRule->rule->matchResult = Matches;
00685   
00686       return true;
00687     }
00688   else
00689     {
00690       flushStackPos = -1;
00691       //the "flush" mode
00692       rel = testedRule->rule->body;
00693       length = 0;
00694       i = inputFirst;
00695 
00696       //for sure
00697       ii = i;
00698 
00699       while (i)
00700         {
00701           if (rel->type == Terminal)
00702             {
00703               //terminal --> simply compare value with current input item
00704           
00705               if (rel->value != i->value)
00706                 {
00707                   testedRule->rule->matchRun = runCount;
00708                   testedRule->rule->matchResult = DoesntMatch;
00709 
00710                   while (flushStackPos != -1)
00711                     {
00712                       rel = flushStack[flushStackPos];
00713                       flushStackPos--;
00714                         
00715                       if (length == 1)
00716                         {
00717                           rel->rule->matchRun = runCount;
00718                           rel->rule->matchResult = DoesntMatch;
00719                         }
00720                     }
00721                   return false;
00722                 }
00723               else
00724                 {
00725                   //rule element matches --> increase the length and move to the next rule element
00726                   length++;
00727                   rel = rel->next;
00728               
00729                   ii = i;
00730                   i = i->next;
00731 
00732                   if (!rel)
00733                     {
00734                       //we have examined the complete rule --> check the stack if the "parent" rule element is present
00735 
00736                       while (flushStackPos != -1 && !rel)
00737                         {
00738                           //jump up if rel is the last element of the rule
00739                           rel = flushStack[flushStackPos]->next;
00740                           flushStackPos--;
00741                         }
00742 
00743                       if (flushStackPos == -1 && !rel)
00744                         {
00745                           //the stack is empty --> we have examined whole rule tree
00746                           //the rule matches and matches completely
00747                           testedRule->matchedLength = length;
00748                           testedRule->matchesCompletely = true;
00749                           testedRule->rule->matchRun = runCount;
00750                           testedRule->rule->matchResult = Matches;
00751                           return true;
00752                         }
00753                     }
00754                 }
00755             }
00756           else
00757             {
00758               //variable --> push current rel to the stack and jump to the referenced rule
00759               flushStackPos++;
00760               if (flushStackPos == KY_GRAMMAR_FLUSH_STACK_SIZE)
00761                 {
00762                   FATAL("Size of KY flush stack exceeded! Please redefine the KY_GRAMMAR_FLUSH_STACK_SIZE constant in kygrammar.h");
00763                 }
00764               flushStack[flushStackPos] = rel;
00765               rel = rel->rule->body;
00766             }
00767         }
00768 
00769       testedRule->rule->matchRun = runCount;
00770       testedRule->rule->matchResult = DoesntMatch;
00771   
00772       return false;
00773     }
00774 }
00775 
00776 
00777 
00778 void KYGrammar::eatData(bool forced)
00779 {
00780   TestedRule testedRule;
00781 
00782   if (forced)
00783     {
00784       //time for preparation of rule testing
00785       bestTestedRule.rule = 0;
00786       bestTestedRule.length = 1;
00787 
00788       //filter rules in ruleSet against the input
00789       for (Rule *r = ruleSet->first(); r; r = ruleSet->next())
00790         {
00791           //ignore the root rule
00792           if (r == rootRule)
00793             continue;
00794 
00795           if (r->matchRun == runCount && r->matchResult == DoesntMatch)
00796             continue;
00797 
00798           if (r->body->type == Terminal && r->body->value != inputFirst->value)
00799             {
00800               r->matchResult = DoesntMatch;
00801               r->matchRun = runCount;
00802               continue;
00803             }
00804 
00805           //DBG("! Testing rule " << r->id);
00806           testedRule.rule = r;
00807           testedRule.currentElements = 0;
00808           if (ruleMatchesInput(&testedRule, true))
00809             {
00810               //DBG("! Rule " << r->id << " matches input");
00811               if (testedRule.matchesCompletely &&
00812                   testedRule.matchedLength > bestTestedRule.length)
00813                 {
00814                   //DBG("! Rule " << r->id << " is current best ! (" << testedRule.matchedLength << ")");
00815                   bestTestedRule.rule = r;
00816                   bestTestedRule.length = testedRule.matchedLength;     //because position in queue starts with 0
00817                   //no need to add rule to testedRules; it is already stored in bestTestedRule
00818                 }
00819             }
00820         }
00821       
00822       RuleElement *rel;
00823       NEW(rel, RuleElement);
00824 
00825       if (bestTestedRule.rule)
00826         //use matched rule
00827         {
00828           rel->type = Variable;
00829           rel->value = 0;
00830           rel->rule = 0;        //for safe
00831           rel->rule = bestTestedRule.rule;
00832           //increase rule reference count
00833           rel->rule->refCount++;
00834           
00835           //delete first N input symbols represented by the rule
00836           InputItem *ii;
00837           for (size_t i = 0 ; i < bestTestedRule.length; i++)
00838             {
00839               ii = inputFirst->next;
00840               DELETE(inputFirst);
00841               inputFirst = ii;
00842             }
00843           DECREASE_INPUT_QUEUE_LENGTH(bestTestedRule.length);
00844           
00845           if (!inputFirst)
00846             //complete input queue deleted
00847             inputLast = 0;
00848         }
00849       else
00850         //use head of input queue instead
00851         {
00852           rel->type = Terminal;
00853           rel->value = 0;
00854           rel->rule = 0;        //for safe
00855           rel->value = inputFirst->value;
00856           
00857           InputItem *ii = inputFirst->next;
00858           DELETE(inputFirst);
00859           inputFirst = ii;
00860           DECREASE_INPUT_QUEUE_LENGTH(1);
00861           if (!inputFirst)
00862             //complete input queue deleted
00863             inputLast = 0;
00864         }
00865       //append new symbol to root rule
00866       appendToRootRule(rel);
00867     }
00868   else
00869     {
00870       bool canEnd = false;
00871       NeighboursDescription *nd;
00872       RuleElementNeighbour *ren;
00873       Rule *r, *rr;
00874 
00875       while (!canEnd)
00876         {
00877           if (!inputFirst)
00878             return;
00879           
00880           if (testedRules->isEmpty())
00881             {
00882               //no rule candidate from previous runs
00883               //let's start new tests
00884               //time for preparation of rule testing
00885               bestTestedRule.rule = 0;
00886               bestTestedRule.length = 1;
00887               
00888               
00889               
00890               //find all of the elements following current input value
00891               nd = terminalDigrams->find(inputFirst->value);
00892 
00893               if (nd)
00894                 {
00895 
00896                   ruleStackPos = -1;
00897 
00898                   for (ren = nd->neighbours.first(); ren; ren = nd->neighbours.next())
00899                     {
00900                       //testing direct match
00901 
00902                       //we are interested only in the elements that are on the second position in the rule
00903                       //i.e. the current input value is the first element in the rule
00904                       if (ren->origin == ren->rule->body->next)
00905                         {
00906                           if (ren->rule->matchRun != runCount)
00907                             {
00908                               ruleStackPos++;
00909                               if (ruleStackPos == KY_GRAMMAR_RULE_STACK_SIZE)
00910                                 {
00911                                   FATAL("Size of the rule stack exceeded!");
00912                                 }
00913                               ruleStack[ruleStackPos] = ren->rule;
00914                               for (rr = nd->representedBy.first(); rr; rr = nd->representedBy.next())
00915                                 {
00916                                   if (rr->matchRun != runCount)
00917                                     {
00918                                       //rule candidate that hasn't been tested in this run
00919                                       ruleStackPos++;
00920                                       if (ruleStackPos == KY_GRAMMAR_RULE_STACK_SIZE)
00921                                         {
00922                                           FATAL("Size of the rule stack exceeded!");
00923                                         }
00924 
00925                                       ruleStack[ruleStackPos] = rr;
00926                                       //  DBG("EATDATA (mode 0) pushed rule: " << rr->id);
00927                                     }
00928                                 }
00929                             }
00930                         }
00931                       else
00932                         continue;
00933 
00934                       while (ruleStackPos > -1)
00935                         {
00936                           r = ruleStack[ruleStackPos];
00937                           ruleStackPos--;
00938                       
00939                           //ignore the root rule
00940                           if (r == rootRule)
00941                             continue;
00942                           
00943                           //ignore rules already processed in the current run
00944                           if (r->matchRun == runCount)
00945                             {
00946                               //                              DBG("Omitting rule: " << r->id);
00947                               continue;
00948                             }
00949                           
00950                           //DBG("! Testing rule " << r->id);
00951                           testedRule.rule = r;
00952                           testedRule.currentElements = 0;
00953                           if (ruleMatchesInput(&testedRule))
00954                             {
00955                               //DBG("! Rule " << r->id << " matches input");
00956                               if (testedRule.matchesCompletely)
00957                                 {
00958                                   //DBG("! Complete match !");
00959                                   
00960                                   if (testedRule.matchedLength > bestTestedRule.length)
00961                                     {
00962                                       //DBG("! Rule " << r->id << " is current best ! (" << testedRule.matchedLength << ")");
00963                                       bestTestedRule.rule = r;
00964                                       bestTestedRule.length = testedRule.matchedLength; //because position in queue starts with 0
00965                                       //no need to add rule to testedRules; it is already stored in bestTestedRule
00966                                     }
00967                                 }
00968                               else
00969                                 {
00970                                   //rule represents more than input, so store it for future
00971                                   //DBG("! Rule represents more than input");
00972                                   //if not force, we are interested also in the rules that represent more than current input
00973                                   TestedRule *tr;
00974                                   
00975                                   NEW(tr, TestedRule);
00976                                   tr->rule = testedRule.rule;
00977                                   tr->matchedLength = testedRule.matchedLength;
00978                                   tr->matchesCompletely = testedRule.matchesCompletely;
00979                                   tr->lastInputItem = testedRule.lastInputItem;
00980                                   tr->currentElements = testedRule.currentElements;
00981                                   
00982                                   testedRules->append(tr);
00983                                 }
00984                             }
00985                         }
00986                     }
00987                 }
00988             }
00989           else
00990             {
00991               TestedRule *tr = testedRules->first();
00992               while (tr)
00993                 {
00994                   //DBG("!! Testing rule " << tr->rule->id);
00995                   if (ruleMatchesInput(tr))
00996                     {
00997                       //DBG("!! Rule " << tr->rule->id << " matches input, length: " << tr->matchedLength);
00998                       if (tr->matchesCompletely)
00999                         {
01000                           //DBG("!! Complete match (" << tr->matchedLength << ")!");
01001 
01002                           if (tr->matchedLength > bestTestedRule.length && tr->matchedLength > 1)
01003                             {
01004                               //DBG("!! Rule " << tr->rule->id << " is current best ! (" << tr->matchedLength << ")");
01005                               bestTestedRule.rule = tr->rule;
01006                               bestTestedRule.length = tr->matchedLength;        //because position in queue starts with 0
01007                               testedRules->remove();
01008                               tr = testedRules->current();
01009                               continue;
01010                             }
01011                           else
01012                             {
01013                               //DBG("!! Rule " << tr->rule->id << " matches input, but not best --> removing");
01014                               testedRules->remove();
01015                               tr = testedRules->current();
01016                               continue;
01017                             }
01018                         }
01019                       else
01020                         {
01021                           canEnd = true;
01022                           //rule represents more than input, so store it for future
01023                           //DBG("!! Rule represents more than input");
01024 
01025                         }
01026                     }
01027                   else
01028                     {
01029                       //DBG("!! Rule " << tr->rule->id << " doesn't match input --> removing from testedRules");
01030                       testedRules->remove();
01031                       tr = testedRules->current();
01032                       continue;
01033                     }
01034 
01035                   tr = testedRules->next();
01036 
01037                 }
01038 
01039             }
01040 
01041           //if no rules remain in testedRules, check bestMatchedRule
01042           //if any rule matched, use it, otherwise add first item in the input queue to the right side of the root rule
01043           if (testedRules->isEmpty())
01044             {
01045               RuleElement *rel;
01046               NEW(rel, RuleElement);
01047 
01048               if (bestTestedRule.rule)
01049                 //use matched rule
01050                 {
01051                   rel->type = Variable;
01052                   rel->value = 0;
01053                   rel->rule = 0;        //for safe
01054                   rel->rule = bestTestedRule.rule;
01055                   //increase rule reference count
01056                   rel->rule->refCount++;
01057 
01058                   //delete first N input symbols represented by the rule
01059                   InputItem *ii;
01060                   for (size_t i = 0 ; i < bestTestedRule.length; i++)
01061                     {
01062                       ii = inputFirst->next;
01063                       DELETE(inputFirst);
01064                       inputFirst = ii;
01065                     }
01066                   DECREASE_INPUT_QUEUE_LENGTH(bestTestedRule.length);
01067 
01068 
01069                   if (!inputFirst)
01070                     //complete input queue deleted
01071                     inputLast = 0;
01072 
01073                 }
01074               else
01075                 //use head of input queue, because no rule matches
01076                 {
01077                   rel->type = Terminal;
01078                   rel->value = 0;
01079                   rel->rule = 0;        //for safe
01080                   rel->value = inputFirst->value;
01081 
01082                   InputItem *ii = inputFirst->next;
01083                   DELETE(inputFirst);
01084                   inputFirst = ii;
01085                   DECREASE_INPUT_QUEUE_LENGTH(1);
01086 
01087                   if (!inputFirst)
01088                     //complete input queue deleted
01089                     inputLast = 0;
01090                 }
01091 
01092               //append new symbol to root rule
01093               appendToRootRule(rel);
01094             }
01095           else
01096             //we have some rules that represnt more than the input
01097             canEnd = true;
01098         }
01099     }
01100 }
01101 
01102 
01104 
01105 
01106 
01107 
01115 void KYGrammar::append(TerminalValue value)
01116 {
01117   InputItem *i;
01118 
01119   NEW(i, InputItem);
01120 
01121   i->next = 0;
01122   i->value = value;
01123 
01124   if (!inputFirst)
01125     inputFirst = i;
01126   else
01127     inputLast->next = i;
01128 
01129   inputLast = i;
01130 
01131   INCREASE_INPUT_QUEUE_LENGTH(1);
01132   eatDataPeriodicity--;
01133 
01134   totalSymbols++;
01135   
01136 
01137   if (!eatDataPeriodicity)
01138     {
01139       struct timeval t1, t2, t3;
01140       timerclear(&t1);
01141       timerclear(&t2);
01142       gettimeofday(&t1, 0);
01143       
01144       eatData();
01145       
01146       gettimeofday(&t2, 0);
01147       timersub(&t2, &t1, &t3);
01148       totalTime+=t3.tv_usec;
01149       eatDataPeriodicity = KY_GRAMMAR_EATDATA_PERIODICITY;
01150     }
01151 }
01152 
01153 
01154 
01160 void KYGrammar::flush(void)
01161 {
01162   DBG("Flushing the grammar...");
01163   while (inputFirst)
01164     {
01165       eatData(true);
01166     }
01167 }
01168 
01169 
01170 
01180 void KYGrammar::appendToRootRule(RuleElement *newElement)
01181 {
01182   RuleElement *last;
01183   NeighboursDescription *nd;
01184   RuleElementNeighbour *r;
01185   Rule *newRule;
01186   bool collision;
01187 
01188 
01189   if (!textCodec)
01190     //use default TextCodec if none has been specified
01191     createDefaultTextCodec();
01192       
01193   if (context)
01194     {
01195       if (!context->isInitialized())
01196         {
01197           //context is not initialized yet
01198           //some character data occurred before the XML declaration
01199           //initialize the context with some default base size (for example UTF_8) ...
01200           setAlphabetBaseSize(textCodec->suggestAlphabetBaseSize(Encodings::UTF_8));
01201         }
01202     }
01203 
01204   //reconstruct appended terminal/variable to the output device
01205   //only for decoding
01206   if (outputDevice)
01207     {
01208       if (newElement->type == Terminal)
01209         {
01210           outputDevice->writeData((const char *)&newElement->value, SIZEOF_XML_CHAR);
01211         }
01212       else
01213         reconstructRule(newElement->rule, outputDevice);
01214     }
01215 
01216 
01217   runCount++;
01218 
01219   //increase the size of the grammar by 1
01220   size++;
01221 
01222   RuleElement appendedElement;
01223   appendedElement = *newElement;
01224 
01225 
01226   last = rootRule->last;
01227   RIGHT_SIDE_APPEND(rootRule, newElement);
01228 
01229   if (last)
01230     //inserted symbol isn't the first symbol of the rule
01231     {
01232       if (last->type == Terminal)
01233         nd = terminalDigrams->find(last->value);
01234       else
01235         nd = variableDigrams->find(last->rule->id);
01236 
01237       if (nd)
01238         //symbol is in digrams
01239         {
01240           //              DBG("Adding to digrams");
01241 
01242           collision = false;
01243 
01244           //check whether there is a record about inserted symbol
01245           for (RuleElementNeighbour *n = nd->neighbours.first(); n; n = nd->neighbours.next())
01246             {
01247               if ((n->origin->type == Terminal && newElement->type == Terminal && n->origin->value == newElement->value)
01248                   || (n->origin->type == Variable && newElement->type == Variable && n->origin->rule == newElement->rule))
01249                 //this should be a safe test....
01250                 {
01251                   if (n->origin == last)
01252                     {
01253                       //                      DBG("Triple --> ignoring");
01254                       collision = true;
01255 
01256                       //set of variables remains unchanged
01257                       I = false;
01258                       break;
01259                     }
01260                   else
01261                     {
01262                       //                      DBG("ACCEPTED DIGRAM: " << n << " - rule: " << n->rule << " origin: " << n->origin << " ");
01263                       //                      DBG("origintype: " << n->origin->type << " originrule: " << n->origin->rule);
01264                       if (n->rule == rootRule)
01265                         //found the same pair in root rule
01266                         {
01267                           //                              DBG("Collision (2x pair in root rule) --> RR2");
01268 
01269                           if (I == false)
01270                             //I equals 0 --> apply RR2 and set I to 1
01271                             //(one variable has been created)
01272                             {
01273                               reductionRule2(n->origin->prev, last);
01274                               I = true;
01275                             }
01276                           else
01277                             //I equals 1 --> apply RR2, then RR1 and set I to 0
01278                             //(one variable has been created and one has been deleted)
01279                             {
01280                               newRule = reductionRule2(n->origin->prev, last);
01281                               reductionRule1(newRule);
01282                             }
01283                         }
01284                       else
01285                         //found the same pair in distinct rules
01286                         {
01287                           //                      DBG(n->rule->body << " : " << n->origin->prev->rule << " ; " << (char)n->origin->value);
01288                           //                      DBG(last->rule << " ; " << (char)last->next->value);
01289 
01290                           if (n->origin->prev->prev || n->origin->next)
01291                             {
01292                               //apply RR3 only if the original rule is longer than 2
01293                               //                              DBG("Collision (2x pair in distinct rules) --> RR3");
01294 
01295                               if (I == false)
01296                                 //I equals 0 --> apply RR3 and set I to 1
01297                                 //(one variable has been created)
01298                                 {
01299                                   reductionRule3(n->rule, n->origin->prev, last);
01300                                   I = true;
01301                                 }
01302                               else
01303                                 //I equals 1 --> apply RR3, then RR1 and set I to 0
01304                                 //(one variable has been created and one has been deleted)
01305                                 {
01306                                   newRule = reductionRule3(n->rule, n->origin->prev, last);
01307                                   reductionRule1(newRule);
01308                                 }
01309                             }
01310                           else
01311                             I = false;
01312                         }
01313 
01314                       //indicate that a digrams collision occurred and end the cycle
01315                       collision = true;
01316                       break;
01317                     }
01318                 }
01319             }
01320 
01321           if (!collision)
01322             //if no collision occurred, insert new (root) digram for last element of the root rule
01323             {
01324               //              DBG("no collision");
01325               NEW(r, RuleElementNeighbour);
01326               r->origin = newElement;
01327 
01328               r->rule = rootRule;
01329               nd->neighbours.append(r);
01330 
01331               //set of variables remains unchanged
01332               I = false;
01333 
01334             }
01335         }
01336       else
01337         //symbol is not in digrams
01338         {
01339           //      DBG("Inserting into digrams");
01340           DIGRAM_ADD(last, newElement, rootRule);
01341 
01342           //set of variables remains unchanged
01343           I = false;
01344         }
01345     }
01346 
01347 
01348   //now comes time for encoding the newly created grammar
01349 #ifdef SEQUENTIAL_ALGORITHM
01350   if (useContextForOutput)
01351     {
01352       if (context)
01353         {
01354           if (appendedElement.type == Terminal)
01355             {
01356               //DBG(":" << (unsigned int)appendedElement.value);
01357               //encoding of a terminal
01358 
01359               if (context->encode((int)(unsigned char)appendedElement.value) == ContextBase::NotKnown)
01360                 {
01361                   FATAL("encoding unknown terminal: " << (unsigned char)appendedElement.value);
01362                 }
01363 
01364               terminalsEncoded++;
01365             }
01366           else
01367             {
01368               //encoding of a variable
01369               long symbolId = context->lastFixedSymbol() + appendedElement.rule->id;
01370 
01371               //DBG("VARENC: " << symbolId <<endl;//- lastFixedContextSymbol);
01372 
01373               if (context->encode(symbolId) == ContextBase::NotKnown)
01374                 {
01375                   FATAL("Encoding unknown symbol: " << symbolId);
01376                 }
01377 
01378               variablesEncoded++;
01379             }
01380         }
01381     }
01382 
01383   //if new symbol has to be installed to the encoding context
01384   //install it now
01385   //it is IMPORTANT to install the symbol after the encoding operation
01386   //otherwise the decoder will be unable to decode "modified" arithmetic code!
01387   if (newSymbolToInstall)
01388     {
01389       context->installSymbol(newSymbolToInstall);
01390       newSymbolToInstall = 0;
01391     }
01392 
01393 
01394   if (size == KY_GRAMMAR_MAX_SIZE)
01395     {
01396       DBG("PURGING THE GRAMMAR");
01397       purge();
01398     }
01399 #endif
01400 
01401 }
01402 
01403 
01404 
01410 void KYGrammar::reductionRule1(Rule *rule)
01411 {
01412   RuleElement *rel;
01413   RuleElement *origRuleEl;
01414   Rule *origRule;
01415   NeighboursDescription *nd;
01416   RuleElementNeighbour *r;
01417 
01418   CHECK_POINTER(rule);
01419 
01420 
01421 
01422   //DBG("Applying reduction rule 1.");
01423 
01424   //DBG("Grammar before applying RR1:");
01425 
01426   //print();
01427   //printDigrams();
01428 
01429   //get the first element of rule
01430   rel = rule->body;
01431 
01432   if (rel->type == Terminal)
01433     {
01434       FATAL("First element is not a variable!");
01435     }
01436 
01437   //get the rule referenced by rel
01438   origRule = rel->rule;
01439 
01440   RuleId *rid;
01441 
01442   NEW(rid, RuleId(rel->rule->id));
01443 
01444   availableRuleNumbers->enqueue(rid);
01445 
01446   //change all digrams of origRule to rule
01447   // :( some optimalisations would be fine...
01448   origRuleEl = origRule->body;
01449   while (origRuleEl->next)
01450     {
01451       DIGRAM_CHANGE(origRuleEl, origRuleEl->next, rule);
01452       origRuleEl = origRuleEl->next;
01453     }
01454 
01455 
01456   //replace rel with origRule contents in rule
01457   origRuleEl->next = rel->next;
01458 
01459   if (rel->next)
01460     {
01461       rel->next->prev = origRuleEl;
01462 
01463       //add digram last element of the origRule to second element of the rule
01464       DIGRAM_ADD(origRuleEl, rel->next, rule);
01465 
01466       //remove digram about rel and rel->next
01467       DIGRAM_REMOVE(rel, rel->next);
01468     }
01469 
01470   rule->body = origRule->body;
01471 
01472 
01473 
01474   //remove the rule from the representedBy list of the first element
01475   //of the rule referenced by that variable
01476 
01477   NeighboursDescription *ndRep;
01478   RuleElement *relRep = rel->rule->body;
01479 
01480   if (relRep->type == Terminal)
01481     ndRep = terminalDigrams->find(relRep->value);
01482   else
01483     ndRep = variableDigrams->find(relRep->rule->id);
01484   
01485   CHECK_POINTER(ndRep);
01486   if (ndRep)
01487     {
01488       //        DBG("RR1 remove (a) from " << (unsigned int)rel->rule->body->value << ":: " << rule->id);
01489       if (!ndRep->representedBy.remove(rule))
01490         {
01491           //      ERR("unable to remove!!!");
01492         }
01493     }
01494   
01495   
01496   
01497   if (origRule->body->type == Variable)
01498     {
01499       relRep = origRule->body;
01500 
01501       while (relRep->type != Terminal)
01502         relRep = relRep->rule->body;
01503       
01504       ndRep = terminalDigrams->find(relRep->value);
01505       
01506       CHECK_POINTER(ndRep);
01507       if (ndRep)
01508         {
01509           //      DBG("RR1 remove from " << (unsigned int)origRule->body->value << ":: " << origRule->id);
01510           while (ndRep->representedBy.remove(origRule));
01511           
01512           //      DBG("RR1 append to " << (unsigned int)relRep->value << ":: " << rule->id);
01513           REPRESENTED_BY_APPEND(ndRep->representedBy, rule);
01514           //      ndRep->representedBy.append(rule);
01515         }
01516     }
01517   
01518 
01519   DELETE(rel);
01520 
01521 
01522   ruleSet->remove(origRule);
01523   DELETE(origRule);
01524 
01525 
01526   //the size of the grammar has to be decreased by one
01527   size--;
01528 
01529 }
01530 
01531 
01532 
01541 Rule *KYGrammar::reductionRule2(RuleElement *elOrig1, RuleElement *elNew1)
01542   //parametr je na puvodni prvni prvek a na novy prvni prvek (oba v rootRule)
01543 {
01544   RuleElement *elOrig2;
01545   RuleElement *elNew2;
01546   RuleElement *var1, *var2;
01547   Rule *newRule;
01548   NeighboursDescription *nd;
01549   RuleElementNeighbour *r;
01550 
01551   //              DBG("Applying reduction rule 2.");
01552 
01553   CHECK_POINTER(elOrig1);
01554   CHECK_POINTER(elNew1);
01555 
01556   elOrig2 = elOrig1->next;
01557   CHECK_POINTER(elOrig2);
01558 
01559   //if any of elOrig1 or elOrig2 were variables,
01560   //decrease the reference count of corresponding rule(s)
01561   if (elOrig1->type == Variable)
01562     elOrig1->rule->refCount--;
01563 
01564   if (elOrig2->type == Variable)
01565     elOrig2->rule->refCount--;
01566 
01567   //create non-terminal substituting first pair (elOrig1, elOrig2)
01568   NEW(var1, RuleElement);
01569   var1->type = Variable;
01570   var1->value = 0;
01571   var1->rule = 0;       //for safe
01572   var1->next = elOrig2->next;
01573 
01574   //omitted test on 0, because 0 shouldn't never happen!
01575   elOrig2->next->prev = var1;
01576   var1->prev = elOrig1->prev;
01577 
01578   //remove digram with successor of elOrig2
01579   DIGRAM_REMOVE(elOrig2, var1->next);
01580 
01581 
01582 
01583   elNew2 = elNew1->next;
01584   CHECK_POINTER(elNew2);
01585 
01586   //create non-terminal substituting second pair (elNew1, elNew2)
01587   NEW(var2, RuleElement);
01588   var2->type = Variable;
01589   var2->value = 0;
01590   var2->rule = 0;       //for safe
01591   var2->next = 0;
01592   var2->prev = elNew1->prev;
01593   elNew1->prev->next = var2;
01594 
01595   //last symbol of the rule is now var2
01596   rootRule->last = var2;
01597 
01598   if (!elOrig1->prev)
01599     //elOrig1 was the first element
01600     //var1 is now the head of the list
01601     rootRule->body = var1;
01602   else
01603     elOrig1->prev->next = var1;
01604 
01605   //create rule from original pair
01606   elOrig1->prev = 0;
01607   elOrig2->next = 0;
01608 
01609     
01610 
01611 
01612   //create new rule with right side containing elOrig1 and elOrig2
01613   newRule = createRule(elOrig1, elOrig2);
01614 
01615   //digram (elOrig1, elOrig2) is now in newRule
01616   DIGRAM_CHANGE(elOrig1, elOrig2, newRule);
01617 
01618   //bind created variables with new rule
01619   var1->rule = newRule;
01620   var2->rule = newRule;
01621 
01622   //appropriately adjust reference count fo the rule
01623   newRule->refCount = 2;
01624 
01625 
01626   //update digrams
01627   DIGRAM_REMOVE(elNew1->prev, elNew1);
01628 
01629   if (var1->prev)
01630     DIGRAM_REMOVE(var1->prev, elOrig1);
01631 
01632   DIGRAM_ADD(var1, var1->next, rootRule);
01633   DIGRAM_ADD(var2->prev, var2, rootRule);
01634 
01635 
01636   //delete second pair of terminals
01637   DELETE(elNew1);
01638   DELETE(elNew2);
01639 
01640 
01641   RuleElement *relRep;
01642   NeighboursDescription *ndRep;
01643 
01644   //if the first symbol of the new rule is a variable
01645   //append the rule to the representedBy list of the first element
01646   //of the rule referenced by that variable
01647   if (elOrig1->type == Variable)
01648     {
01649       relRep = elOrig1;
01650       //DBG (">>> " << elOrig1->rule->id << " : body = " << elOrig1->rule->body->value);
01651 
01652       while (relRep->type != Terminal)
01653         relRep = relRep->rule->body;
01654 
01655       ndRep = terminalDigrams->find(relRep->value);
01656       
01657       CHECK_POINTER(ndRep);
01658       if (ndRep)
01659         {
01660           //      DBG("RR2 append to "  << (unsigned int)relRep->value << ":: " << newRule->id);
01661           REPRESENTED_BY_APPEND(ndRep->representedBy, newRule);
01662           //ndRep->representedBy.append(newRule);
01663         }
01664 
01665     }
01666   
01667 
01668 
01669   //the size doesn't change: size-2*2 + 2*1 + 1*2 (replace two digrams with two variables and create new rule with size 2)
01670   //return pointer to the new rule
01671   return newRule;
01672 }
01673 
01674 
01675 
01685 Rule *KYGrammar::reductionRule3(Rule *ruleOrig, RuleElement *elOrig1, RuleElement *elNew1)
01686 {
01687   RuleElement *elOrig2;
01688   RuleElement *elNew2;
01689   RuleElement *var1, *var2;
01690   Rule *newRule;
01691   NeighboursDescription *nd;
01692   RuleElementNeighbour *r;
01693 
01694   //              DBG("Applying reduction rule 3.");
01695 
01696   CHECK_POINTER(ruleOrig);
01697   CHECK_POINTER(elOrig1);
01698   CHECK_POINTER(elNew1);
01699 
01700   elOrig2 = elOrig1->next;
01701   CHECK_POINTER(elOrig2);
01702 
01703   //if any of elOrig1 or elOrig2 were variables,
01704   //decrease the reference count of responding rule(s)
01705   if (elOrig1->type == Variable)
01706     elOrig1->rule->refCount--;
01707 
01708   if (elOrig2->type == Variable)
01709     elOrig2->rule->refCount--;
01710 
01711   //create non-terminal substituting first pair (elOrig1, elOrig2)
01712   NEW(var1, RuleElement);
01713   var1->type = Variable;
01714   var1->value = 0;
01715   var1->rule = 0;       //for safe
01716 
01717   var1->next = elOrig2->next;
01718   if (elOrig2->next)
01719     {
01720       //remove digram with successor of elOrig2
01721       //       DBG("ELORIG2, remove 1: " << elOrig2);
01722       DIGRAM_REMOVE(elOrig2, var1->next);
01723       elOrig2->next->prev = var1;
01724     }
01725   else
01726     //var1 is now the last element of ruleOrig
01727     ruleOrig->last = var1;
01728 
01729 
01730   var1->prev = elOrig1->prev;
01731   if (!elOrig1->prev)
01732     //elOrig1 was the first element
01733     //var1 is now the first element of the ruleOrig
01734     ruleOrig->body = var1;
01735   else
01736     elOrig1->prev->next = var1;
01737 
01738 
01739   elNew2 = elNew1->next;
01740   CHECK_POINTER(elNew2);
01741 
01742   //create non-terminal substituting second pair (elNew1, elNew2)
01743   NEW(var2, RuleElement);
01744   var2->type = Variable;
01745   var2->value = 0;
01746   var2->rule = 0;       //for safe
01747   var2->next = 0;
01748   var2->prev = elNew1->prev;
01749 
01750   if (elNew1->prev)
01751     elNew1->prev->next = var2;
01752   else
01753     //var2 is now the only element of roorRule
01754     rootRule->body = var2;
01755 
01756   //last symbol of the rule is now var2
01757   rootRule->last = var2;
01758 
01759 
01760   //create rule from original pair
01761   elOrig1->prev = 0;
01762   elOrig2->next = 0;
01763 
01764 
01765   //create new rule with right side containing elOrig1 and elOrig2
01766   newRule = createRule(elOrig1, elOrig2);
01767 
01768   //digram (elOrig1, elOrig2) is now owned by newRule
01769   DIGRAM_CHANGE(elOrig1, elOrig2, newRule);
01770 
01771   //bind created variables with new rule
01772   var1->rule = newRule;
01773   var2->rule = newRule;
01774 
01775   //appropriately adjust reference count fo the rule
01776   newRule->refCount = 2;
01777 
01778 
01779   //update digrams
01780   //    DBG("ELNEW1->PREV, remove 2: " << elNew1->prev);
01781   DIGRAM_REMOVE(elNew1->prev, elNew1);
01782   if (var1->prev)
01783     {
01784       //        DBG("VAR1->prev, remove 3" << var1->prev);
01785       DIGRAM_REMOVE(var1->prev, elOrig1);
01786     }
01787 
01788   if (var1->next)
01789     DIGRAM_ADD(var1, var1->next, ruleOrig);
01790 
01791   if (var1->prev)
01792     DIGRAM_ADD(var1->prev, var1, ruleOrig);
01793 
01794   DIGRAM_ADD(var2->prev, var2, rootRule);
01795 
01796 
01797   //delete second pair of terminals
01798   DELETE(elNew1);
01799   DELETE(elNew2);
01800 
01801 
01802   NeighboursDescription *ndRep;
01803   RuleElement *relRep;
01804   
01805 
01806 
01807   if (!var1->prev)
01808     {
01809       relRep = var1;
01810 
01811       while (relRep->type != Terminal)
01812         relRep = relRep->rule->body;
01813 
01814       ndRep = terminalDigrams->find(relRep->value);
01815 
01816       if (ndRep)
01817         {
01818           REPRESENTED_BY_APPEND(ndRep->representedBy, ruleOrig);
01819         }
01820     }
01821   //if the first symbol of the new rule is a variable
01822   //append the rule to the representedBy list of the first element
01823   //of the rule referenced by that variable
01824   if (elOrig1->type == Variable)
01825     {
01826       if (var1->prev)
01827         {
01828           relRep = elOrig1;
01829           
01830           while (relRep->type != Terminal)
01831             relRep = relRep->rule->body;
01832 
01833           ndRep = terminalDigrams->find(relRep->value);
01834 
01835         }
01836       
01837       CHECK_POINTER(ndRep);
01838       if (ndRep)
01839         {
01840           //      DBG(newRule->id);
01841           //      DBG("RR3 append (a) to "  << (unsigned int)relRep->value << ":: " << newRule->id);
01842           REPRESENTED_BY_APPEND(ndRep->representedBy, newRule);
01843         }
01844     }
01845 
01846 
01847   //the size doesn't change: size-2*2 + 2*1 + 1*2 (replace two digrams with two variables and create new rule with size 2)
01848   
01849   //return pointer to the new rule
01850   return newRule;
01851 }
01852 
01853 
01854 
01860 void KYGrammar::reconstructInput(void)
01861 {
01862   //    DBG("Reconstructed data follow on the next line:");
01863   reconstructRule(rootRule, outputDevice);
01864   //    DBG("Reconstructed data end.");
01865 }
01866 
01867 
01868 
01877 void KYGrammar::reconstructRule(Rule *rule, IODevice *outputDevice)
01878 {
01879   RuleElement *rel;
01880 
01881   rel = rule->body;
01882 
01883   while (rel)
01884     {
01885       if (rel->type == Terminal)
01886         {
01887           if (!outputDevice)
01888             OUTPUT((char)rel->value);
01889           else
01890             {
01891               outputDevice->writeData((const char *)&rel->value, SIZEOF_XML_CHAR);
01892             }
01893         }
01894       else
01895         reconstructRule(rel->rule, outputDevice);
01896 
01897       rel = rel->next;
01898     }
01899 }
01900 
01901 
01902 
01903 size_t KYGrammar::getRuleLength(Rule *rule)
01904 {
01905   RuleElement *rel;
01906   size_t size = 0;
01907 
01908   rel = rule->body;
01909 
01910   while (rel)
01911     {
01912       if (rel->type == Terminal)
01913         size++;
01914       else
01915         size = size + getRuleLength(rel->rule);
01916 
01917       rel = rel->next;
01918     }
01919 
01920   return size;
01921 }
01922 
01923 
01924 
01928 void KYGrammar::print(void)
01929 {
01930   Rule *rule;
01931   RuleElement *rel;
01932 
01933   OUTPUTENL("KY grammar content");
01934   //OUTPUTENL("Current run: " << (int)runCount);
01935   //OUTPUTENL("  Number of rules: \t" << ruleSet->count());
01936 
01937   for (rule = ruleSet->first(); rule; rule = ruleSet->next())
01938     {
01939       OUTPUTE("  R" << std::dec << rule->id << " (length: " << getRuleLength(rule) << ", used: " << rule->refCount << "x) ---> \'");
01940 
01941       if (RIGHT_SIDE_IS_EMPTY(rule))
01942         {
01943           OUTPUTE("empty");
01944         }
01945       else
01946         {
01947           for (rel = rule->body; rel; rel = rel->next)
01948             {
01949               if (rel->type == Terminal)
01950                 {
01951                   if ((int)rel->value > 31)
01952                     {
01953                       //output the character
01954                       OUTPUTE((char)rel->value << " ");
01955                     }
01956                   else
01957                     {
01958                       //output the hexadecimal code
01959                       OUTPUTE("0x");
01960                       OUTPUTE(std::hex << std::setw(2) << std::setfill('0') << (int)rel->value << " ");
01961                     }
01962                 }
01963               else
01964                 {
01965                   OUTPUTE("R" << rel->rule->id << " ");
01966                 }
01967             }
01968         }
01969 
01970       OUTPUTE("\'");
01971       OUTPUTEENDLINE;
01972     }
01973 
01974   OUTPUTE(std::dec);
01975   OUTPUTEENDLINE;
01976 
01977   //   OUTPUTENL("I = " << I);
01978 }
01979 
01980 
01984 void KYGrammar::printDigrams(void)
01985 {
01986   NeighboursDescription *nd;
01987   
01988   OUTPUTENL("** TERMINAL DIGRAMS");
01989   for (long i = 0; i < 9000; i++)
01990     {
01991       nd = terminalDigrams->find(i);
01992       if (nd && nd->neighbours.count())
01993         {
01994           OUTPUTEENDLINE;
01995           OUTPUTENL("DIGRAMS FOR TERMINAL: " << (int)i);
01996           OUTPUTE("             represented by rules: ");
01997           for (Rule *r = nd->representedBy.first(); r; r = nd->representedBy.next())
01998             {
01999               OUTPUTE(r->id << ", ");
02000             }
02001           OUTPUTEENDLINE;
02002           
02003           for (RuleElementNeighbour *r = nd->neighbours.first(); r; r = nd->neighbours.next())
02004             {
02005               if (r->origin->type == Terminal)
02006                 OUTPUTE("  \"" << (int)r->origin->value << "\"");
02007               else
02008                 OUTPUTE("  N" << r->origin->rule->id);
02009 
02010               OUTPUTENL("       [" << r->origin << ":" << r->rule << "]");
02011             }
02012         }
02013     }
02014 
02015   OUTPUTENL("** VARIABLE DIGRAMS");
02016 
02017   for (long i = 0; i < 9000; i++)
02018     {
02019       nd = variableDigrams->find(i);
02020       if (nd && nd->neighbours.count())
02021         {
02022           OUTPUTEENDLINE;
02023           OUTPUTENL("DIGRAMS FOR VARIABLE: " << i);
02024           for (RuleElementNeighbour *r = nd->neighbours.first(); r; r = nd->neighbours.next())
02025             {
02026               if (r->origin->type == Terminal)
02027                 OUTPUTE("  \"" << (int)r->origin->value << "\"");
02028               else
02029                 OUTPUTE("  N" << r->origin->rule->id);
02030 
02031               OUTPUTENL("       [" << r->origin << ":" << r->rule << "]");
02032             }
02033         }
02034     }
02035 
02036   OUTPUTEENDLINE;
02037 }
02038 
02039 
02040 
02047 void KYGrammar::setContext(ContextBase *ctxt, bool useForOutput = true)
02048 {
02049   context = ctxt;
02050   useContextForOutput = useForOutput;
02051 }
02052 

Generated on Wed Feb 5 10:43:02 2003 for Exalt by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002