00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00026 #ifdef __GNUG__
00027 # pragma implementation
00028 #endif
00029
00030
00031 #include "kygrammar.h"
00032
00033
00034
00040 #define DECREASE_INPUT_QUEUE_LENGTH(_amount_) inputQueueLength -= _amount_
00041
00042
00050 #define INCREASE_INPUT_QUEUE_LENGTH(_amount_) \
00051 { \
00052 inputQueueLength += _amount_; \
00053 if (inputQueueLength > maxInputQueueLength) \
00054 maxInputQueueLength = inputQueueLength; \
00055 }
00056
00057
00067 #define TESTEDRULES_POP(_val_, _stack_, _tmpStack_) \
00068 { \
00069 _val_ = _stack_->rel; \
00070 _tmpStack_ = _stack_; \
00071 _stack_ = _stack_->next; \
00072 DELETE(_tmpStack_); \
00073 }
00074
00075
00085 #define TESTEDRULES_PUSH(_val_, _stack_, _tmpStack_) \
00086 { \
00087 NEW(_tmpStack_, TestedElement); \
00088 _tmpStack_->rel = _val_; \
00089 _tmpStack_->next = _stack_; \
00090 _stack_ = _tmpStack_; \
00091 }
00092
00093
00099 #define DIGRAM_REMOVE(_el1_, _el2_) \
00100 { \
00101 if (_el1_->type == Terminal) \
00102 {nd = terminalDigrams->find(_el1_->value);} \
00103 else \
00104 {nd = variableDigrams->find(_el1_->rule->id);} \
00105 \
00106 if (nd) \
00107 for (r = nd->neighbours.first(); r; r = nd->neighbours.next()) \
00108 if (r->origin == _el2_) \
00109 { \
00110 nd->neighbours.remove(); \
00111 DELETE(r); \
00112 break; \
00113 } \
00114 }
00115
00116
00122 #define DIGRAM_ADD(_el1_, _el2_, _rulePtr_) \
00123 { \
00124 bool _needInsert_ = true; \
00125 if (_el1_->type == Terminal) \
00126 { \
00127 if (!(nd = terminalDigrams->find(_el1_->value))) \
00128 { \
00129 NEW(nd, NeighboursDescription); \
00130 terminalDigrams->insert(_el1_->value, nd); \
00131 } \
00132 } \
00133 else \
00134 { \
00135 if (!(nd = variableDigrams->find(_el1_->rule->id))) \
00136 { \
00137 NEW(nd, NeighboursDescription); \
00138 variableDigrams->insert(_el1_->rule->id, nd); \
00139 } \
00140 } \
00141 \
00142 for (r = nd->neighbours.first(); r; r = nd->neighbours.next()) \
00143 if (r->origin->type == _el2_->type && r->origin->rule == _el2_->rule) \
00144 { \
00145 _needInsert_ = false; \
00146 break; \
00147 } \
00148 \
00149 if (_needInsert_) \
00150 { \
00151 NEW(r, RuleElementNeighbour); \
00152 r->origin = _el2_; \
00153 r->rule = _rulePtr_; \
00154 nd->neighbours.append(r); \
00155 } \
00156 }
00157
00158
00159
00165 #define DIGRAM_CHANGE(_el1_, _el2_, _rulePtr_) \
00166 { \
00167 if (_el1_->type == Terminal) \
00168 nd = terminalDigrams->find(_el1_->value); \
00169 else \
00170 nd = variableDigrams->find(_el1_->rule->id); \
00171 \
00172 if (nd) \
00173 for (r = nd->neighbours.first(); r; r = nd->neighbours.next()) \
00174 if (r->origin == _el2_) \
00175 { \
00176 r->rule = _rulePtr_; \
00177 break; \
00178 } \
00179 }
00180
00181
00187 #define RIGHT_SIDE_APPEND(_rule_, _el_) \
00188 if (!_rule_->body) \
00189 { \
00190 _rule_->body = _el_; \
00191 _rule_->last = _el_; \
00192 _el_->next = 0; \
00193 _el_->prev = 0; \
00194 } \
00195 else \
00196 { \
00197 _rule_->last->next = _el_; \
00198 _el_->prev = _rule_->last; \
00199 _el_->next = 0; \
00200 _rule_->last = _el_; \
00201 }
00202
00203
00209 #define RIGHT_SIDE_IS_EMPTY(_rule_) (_rule_->last == 0)
00210
00211
00220 #define REPRESENTED_BY_APPEND(_list_, _rule_) \
00221 { \
00222 bool _needInsert_ = true; \
00223 \
00224 for (Rule *_rr_ = _list_.first(); _rr_; _rr_ = _list_.next()) \
00225 { \
00226 if (_rr_ == _rule_) \
00227 { \
00228 _needInsert_ = false; \
00229 break; \
00230 } \
00231 } \
00232 if (_needInsert_) \
00233 _list_.append(_rule_); \
00234 }
00235
00236
00237
00241 KYGrammar::KYGrammar(void)
00242 : GrammarBase()
00243 {
00244 initKYGrammar();
00245 }
00246
00247
00248
00252 void KYGrammar::initKYGrammar(void)
00253 {
00254 size = 0;
00255 newSymbolToInstall = 0;
00256 inputQueueLength = maxInputQueueLength = 0;
00257
00258 totalTime = totalSymbols = 0;
00259 variablesEncoded = 0;
00260 terminalsEncoded = 0;
00261
00262 eatDataPeriodicity = KY_GRAMMAR_EATDATA_PERIODICITY;
00263
00264 NEW(flushStack, RuleElement*[KY_GRAMMAR_FLUSH_STACK_SIZE]);
00265 flushStackPos = -1;
00266
00267 NEW(ruleStack, Rule*[KY_GRAMMAR_RULE_STACK_SIZE]);
00268 ruleStackPos = -1;
00269
00270 outputDevice = 0;
00271
00272 context = 0;
00273 useContextForOutput = false;
00274
00275 NEW(ruleSet, RuleSet);
00276
00277 ruleCounter = 0;
00278
00279
00280 inputFirst = inputLast = 0;
00281 NEW(testedRules, TestedRules);
00282
00283 NEW(availableRuleNumbers, Queue<RuleId>);
00284
00285 availableRuleNumbers->setAutoDelete(true);
00286
00287 runCount = 1;
00288
00289 rootRule = 0;
00290 createRootRule();
00291
00292 NEW(terminalDigrams, TerminalDigrams);
00293 terminalDigrams->setAutoDelete(true);
00294
00295 NEW(variableDigrams, VariableDigrams);
00296 variableDigrams->setAutoDelete(true);
00297
00298
00299 I = false;
00300 }
00301
00302
00303
00307 KYGrammar::~KYGrammar(void)
00308 {
00309
00310 flush();
00311
00312 DELETE_ARRAY(flushStack);
00313 DELETE_ARRAY(ruleStack);
00314
00315 if (ExaltOptions::getOption(ExaltOptions::PrintGrammar) == ExaltOptions::Yes)
00316 print();
00317
00318 if (ExaltOptions::getOption(ExaltOptions::Verbose) == ExaltOptions::Yes)
00319 {
00320 OUTPUTENL("KY grammar statistics");
00321 OUTPUTE(" Total symbols processed: \t\t");
00322 if (useContextForOutput)
00323 {
00324
00325 OUTPUTENL(totalSymbols);
00326 }
00327 else
00328 {
00329
00330 OUTPUTENL(runCount-1);
00331 }
00332
00333 OUTPUTENL(" Symbols appended to the root rule: \t" << runCount-1);
00334 OUTPUTENL(" Number of rules: \t\t\t" << ruleSet->count());
00335
00336 if (useContextForOutput)
00337 {
00338
00339 OUTPUTENL(" Variables encoded: \t\t\t" << variablesEncoded);
00340 OUTPUTENL(" Terminals encoded: \t\t\t" << terminalsEncoded);
00341 OUTPUTENL(" Maximum length of the input queue: \t" << maxInputQueueLength);
00342 OUTPUTENL(" Average time per symbol: \t\t" << (double)totalTime/totalSymbols << " microsecs");
00343 }
00344
00345 OUTPUTEENDLINE;
00346 }
00347
00348
00349 DELETE(ruleSet);
00350
00351 DELETE(testedRules);
00352 DELETE(availableRuleNumbers);
00353
00354 DELETE(terminalDigrams);
00355 DELETE(variableDigrams);
00356
00357 deleteDefaultTextCodec();
00358 }
00359
00360
00361
00365 void KYGrammar::purge(void)
00366 {
00367 DELETE(flushStack);
00368 DELETE(ruleStack);
00369
00370 DELETE(ruleSet);
00371
00372 DELETE(testedRules);
00373 DELETE(availableRuleNumbers);
00374
00375 DELETE(terminalDigrams);
00376 DELETE(variableDigrams);
00377
00378
00379 NEW(flushStack, RuleElement*[KY_GRAMMAR_FLUSH_STACK_SIZE]);
00380 flushStackPos = -1;
00381
00382 NEW(ruleStack, Rule*[KY_GRAMMAR_RULE_STACK_SIZE]);
00383 ruleStackPos = -1;
00384
00385 NEW(ruleSet, RuleSet);
00386
00387 NEW(testedRules, TestedRules);
00388
00389 NEW(availableRuleNumbers, Queue<RuleId>);
00390
00391 availableRuleNumbers->setAutoDelete(true);
00392
00393 runCount = 1;
00394 ruleCounter = 0;
00395
00396 rootRule = 0;
00397 createRootRule();
00398
00399 NEW(terminalDigrams, TerminalDigrams);
00400 terminalDigrams->setAutoDelete(true);
00401
00402 NEW(variableDigrams, VariableDigrams);
00403 variableDigrams->setAutoDelete(true);
00404
00405
00406 I = false;
00407
00408
00409 size = 0;
00410 }
00411
00412
00413
00419 void KYGrammar::setAlphabetBaseSize(size_t size)
00420 {
00421 if (context)
00422 {
00423 if (!context->isInitialized())
00424 {
00425
00426 context->setType(size, DynamicContext);
00427 context->initialize();
00428
00429
00430 char fibBuf[10];
00431 size_t nrItems = Fibonacci::encodeToBuffer(fibBuf, SIZEOF_CHAR, size);
00432 outputDevice->writeData(fibBuf, nrItems);
00433 }
00434 }
00435
00436
00437 outputDevice = 0;
00438 }
00439
00440
00441
00447 void KYGrammar::setOutputDevice(IODevice *device)
00448 {
00449 outputDevice = device;
00450 }
00451
00452
00458 Rule *KYGrammar::createRootRule(void)
00459 {
00460 if (rootRule)
00461 {
00462 WRN("Root roole already exists --> cannot create root rule.");
00463 return 0;
00464 }
00465 else
00466 {
00467 NEW(rootRule, Rule);
00468
00469 rootRule->counter = 1;
00470
00471 if (!rootRule)
00472 return 0;
00473
00474 rootRule->refCount = 1;
00475 rootRule->id = ruleCounter;
00476 rootRule->body = 0;
00477 rootRule->last = 0;
00478
00479 ruleCounter++;
00480
00481 ruleSet->append(rootRule);
00482
00483 return rootRule;
00484 }
00485 }
00486
00487
00488
00497 Rule *KYGrammar::createRule(RuleElement *start, RuleElement *last)
00498 {
00499 Rule *rule;
00500
00501 NEW(rule, Rule);
00502
00503
00504 rule->counter = 1;
00505
00506 rule->matchRun = 0;
00507 rule->matchResult = Ignore;
00508
00509 rule->body = start;
00510 rule->last = last;
00511
00512 rule->refCount = 0;
00513
00514
00515 if (availableRuleNumbers->isEmpty())
00516 {
00517
00518 rule->id = ruleCounter;
00519
00520
00521 newSymbolToInstall = context->lastFixedSymbol() + ruleCounter;
00522
00523 DBG("Installing symbol: " << (long)(context->lastFixedSymbol() + ruleCounter) << "(" << context->lastFixedSymbol() << "," << ruleCounter << ")");
00524 ruleCounter++;
00525 }
00526 else
00527 {
00528
00529 RuleId *rid;
00530
00531 rid = availableRuleNumbers->dequeue();
00532
00533 rule->id = *rid;
00534
00535 DELETE(rid);
00536 }
00537
00538 ruleSet->append(rule);
00539
00540 return rule;
00541 }
00542
00543
00544
00552 Rule *KYGrammar::findRule(RuleId id)
00553 {
00554 for (Rule *r = ruleSet->first(); r; r = ruleSet->next())
00555 {
00556 if (r->id == id)
00557 return r;
00558 }
00559
00560 return 0;
00561 }
00562
00563
00564
00574 bool KYGrammar::ruleMatchesInput(TestedRule *testedRule, bool flush = false)
00575 {
00576 RuleElement *rel;
00577 unsigned int length;
00578 InputItem *i, *ii;
00579
00580 if (testedRule->rule->matchResult == DoesntMatch && testedRule->rule->matchRun == runCount)
00581 return false;
00582
00583 testedRule->rule->matchRun = runCount;
00584
00585 if (!flush)
00586 {
00587 TestedElement *stack = testedRule->currentElements;
00588 TestedElement *s;
00589
00590
00591 if (!stack)
00592 {
00593
00594 rel = testedRule->rule->body;
00595 CHECK_POINTER(rel);
00596 length = 0;
00597 i = inputFirst;
00598 }
00599 else
00600 {
00601 TESTEDRULES_POP(rel, stack, s);
00602 length = testedRule->matchedLength;
00603 i = testedRule->lastInputItem->next;
00604 }
00605
00606
00607 ii = i;
00608
00609 while (i)
00610 {
00611 if (rel->type == Terminal)
00612 {
00613
00614
00615 if (rel->value != i->value)
00616 {
00617
00618 testedRule->rule->matchResult = DoesntMatch;
00619 testedRule->rule->matchRun = runCount;
00620
00621 while (stack)
00622 {
00623 TESTEDRULES_POP(rel, stack, s);
00624
00625 if (length == 1)
00626 {
00627 rel->rule->matchRun = runCount;
00628 rel->rule->matchResult = DoesntMatch;
00629 }
00630 }
00631 testedRule->currentElements = 0;
00632 return false;
00633 }
00634 else
00635 {
00636
00637 length++;
00638 rel = rel->next;
00639
00640 ii = i;
00641 i = i->next;
00642
00643 if (!rel)
00644 {
00645
00646
00647 while (stack && !rel)
00648 {
00649
00650 TESTEDRULES_POP(rel, stack, s);
00651
00652 rel = rel->next;
00653 }
00654
00655 if (!stack && !rel)
00656 {
00657
00658
00659 testedRule->currentElements = 0;
00660 testedRule->matchedLength = length;
00661 testedRule->matchesCompletely = true;
00662 testedRule->rule->matchResult = Matches;
00663 return true;
00664 }
00665 }
00666 }
00667 }
00668 else
00669 {
00670
00671 TESTEDRULES_PUSH(rel, stack, s);
00672
00673 rel = rel->rule->body;
00674 }
00675 }
00676
00677
00678
00679 TESTEDRULES_PUSH(rel, stack, s);
00680 testedRule->currentElements = stack;
00681 testedRule->matchesCompletely = false;
00682 testedRule->matchedLength = length;
00683 testedRule->lastInputItem = ii;
00684 testedRule->rule->matchResult = Matches;
00685
00686 return true;
00687 }
00688 else
00689 {
00690 flushStackPos = -1;
00691
00692 rel = testedRule->rule->body;
00693 length = 0;
00694 i = inputFirst;
00695
00696
00697 ii = i;
00698
00699 while (i)
00700 {
00701 if (rel->type == Terminal)
00702 {
00703
00704
00705 if (rel->value != i->value)
00706 {
00707 testedRule->rule->matchRun = runCount;
00708 testedRule->rule->matchResult = DoesntMatch;
00709
00710 while (flushStackPos != -1)
00711 {
00712 rel = flushStack[flushStackPos];
00713 flushStackPos--;
00714
00715 if (length == 1)
00716 {
00717 rel->rule->matchRun = runCount;
00718 rel->rule->matchResult = DoesntMatch;
00719 }
00720 }
00721 return false;
00722 }
00723 else
00724 {
00725
00726 length++;
00727 rel = rel->next;
00728
00729 ii = i;
00730 i = i->next;
00731
00732 if (!rel)
00733 {
00734
00735
00736 while (flushStackPos != -1 && !rel)
00737 {
00738
00739 rel = flushStack[flushStackPos]->next;
00740 flushStackPos--;
00741 }
00742
00743 if (flushStackPos == -1 && !rel)
00744 {
00745
00746
00747 testedRule->matchedLength = length;
00748 testedRule->matchesCompletely = true;
00749 testedRule->rule->matchRun = runCount;
00750 testedRule->rule->matchResult = Matches;
00751 return true;
00752 }
00753 }
00754 }
00755 }
00756 else
00757 {
00758
00759 flushStackPos++;
00760 if (flushStackPos == KY_GRAMMAR_FLUSH_STACK_SIZE)
00761 {
00762 FATAL("Size of KY flush stack exceeded! Please redefine the KY_GRAMMAR_FLUSH_STACK_SIZE constant in kygrammar.h");
00763 }
00764 flushStack[flushStackPos] = rel;
00765 rel = rel->rule->body;
00766 }
00767 }
00768
00769 testedRule->rule->matchRun = runCount;
00770 testedRule->rule->matchResult = DoesntMatch;
00771
00772 return false;
00773 }
00774 }
00775
00776
00777
00778 void KYGrammar::eatData(bool forced)
00779 {
00780 TestedRule testedRule;
00781
00782 if (forced)
00783 {
00784
00785 bestTestedRule.rule = 0;
00786 bestTestedRule.length = 1;
00787
00788
00789 for (Rule *r = ruleSet->first(); r; r = ruleSet->next())
00790 {
00791
00792 if (r == rootRule)
00793 continue;
00794
00795 if (r->matchRun == runCount && r->matchResult == DoesntMatch)
00796 continue;
00797
00798 if (r->body->type == Terminal && r->body->value != inputFirst->value)
00799 {
00800 r->matchResult = DoesntMatch;
00801 r->matchRun = runCount;
00802 continue;
00803 }
00804
00805
00806 testedRule.rule = r;
00807 testedRule.currentElements = 0;
00808 if (ruleMatchesInput(&testedRule, true))
00809 {
00810
00811 if (testedRule.matchesCompletely &&
00812 testedRule.matchedLength > bestTestedRule.length)
00813 {
00814
00815 bestTestedRule.rule = r;
00816 bestTestedRule.length = testedRule.matchedLength;
00817
00818 }
00819 }
00820 }
00821
00822 RuleElement *rel;
00823 NEW(rel, RuleElement);
00824
00825 if (bestTestedRule.rule)
00826
00827 {
00828 rel->type = Variable;
00829 rel->value = 0;
00830 rel->rule = 0;
00831 rel->rule = bestTestedRule.rule;
00832
00833 rel->rule->refCount++;
00834
00835
00836 InputItem *ii;
00837 for (size_t i = 0 ; i < bestTestedRule.length; i++)
00838 {
00839 ii = inputFirst->next;
00840 DELETE(inputFirst);
00841 inputFirst = ii;
00842 }
00843 DECREASE_INPUT_QUEUE_LENGTH(bestTestedRule.length);
00844
00845 if (!inputFirst)
00846
00847 inputLast = 0;
00848 }
00849 else
00850
00851 {
00852 rel->type = Terminal;
00853 rel->value = 0;
00854 rel->rule = 0;
00855 rel->value = inputFirst->value;
00856
00857 InputItem *ii = inputFirst->next;
00858 DELETE(inputFirst);
00859 inputFirst = ii;
00860 DECREASE_INPUT_QUEUE_LENGTH(1);
00861 if (!inputFirst)
00862
00863 inputLast = 0;
00864 }
00865
00866 appendToRootRule(rel);
00867 }
00868 else
00869 {
00870 bool canEnd = false;
00871 NeighboursDescription *nd;
00872 RuleElementNeighbour *ren;
00873 Rule *r, *rr;
00874
00875 while (!canEnd)
00876 {
00877 if (!inputFirst)
00878 return;
00879
00880 if (testedRules->isEmpty())
00881 {
00882
00883
00884
00885 bestTestedRule.rule = 0;
00886 bestTestedRule.length = 1;
00887
00888
00889
00890
00891 nd = terminalDigrams->find(inputFirst->value);
00892
00893 if (nd)
00894 {
00895
00896 ruleStackPos = -1;
00897
00898 for (ren = nd->neighbours.first(); ren; ren = nd->neighbours.next())
00899 {
00900
00901
00902
00903
00904 if (ren->origin == ren->rule->body->next)
00905 {
00906 if (ren->rule->matchRun != runCount)
00907 {
00908 ruleStackPos++;
00909 if (ruleStackPos == KY_GRAMMAR_RULE_STACK_SIZE)
00910 {
00911 FATAL("Size of the rule stack exceeded!");
00912 }
00913 ruleStack[ruleStackPos] = ren->rule;
00914 for (rr = nd->representedBy.first(); rr; rr = nd->representedBy.next())
00915 {
00916 if (rr->matchRun != runCount)
00917 {
00918
00919 ruleStackPos++;
00920 if (ruleStackPos == KY_GRAMMAR_RULE_STACK_SIZE)
00921 {
00922 FATAL("Size of the rule stack exceeded!");
00923 }
00924
00925 ruleStack[ruleStackPos] = rr;
00926
00927 }
00928 }
00929 }
00930 }
00931 else
00932 continue;
00933
00934 while (ruleStackPos > -1)
00935 {
00936 r = ruleStack[ruleStackPos];
00937 ruleStackPos--;
00938
00939
00940 if (r == rootRule)
00941 continue;
00942
00943
00944 if (r->matchRun == runCount)
00945 {
00946
00947 continue;
00948 }
00949
00950
00951 testedRule.rule = r;
00952 testedRule.currentElements = 0;
00953 if (ruleMatchesInput(&testedRule))
00954 {
00955
00956 if (testedRule.matchesCompletely)
00957 {
00958
00959
00960 if (testedRule.matchedLength > bestTestedRule.length)
00961 {
00962
00963 bestTestedRule.rule = r;
00964 bestTestedRule.length = testedRule.matchedLength;
00965
00966 }
00967 }
00968 else
00969 {
00970
00971
00972
00973 TestedRule *tr;
00974
00975 NEW(tr, TestedRule);
00976 tr->rule = testedRule.rule;
00977 tr->matchedLength = testedRule.matchedLength;
00978 tr->matchesCompletely = testedRule.matchesCompletely;
00979 tr->lastInputItem = testedRule.lastInputItem;
00980 tr->currentElements = testedRule.currentElements;
00981
00982 testedRules->append(tr);
00983 }
00984 }
00985 }
00986 }
00987 }
00988 }
00989 else
00990 {
00991 TestedRule *tr = testedRules->first();
00992 while (tr)
00993 {
00994
00995 if (ruleMatchesInput(tr))
00996 {
00997
00998 if (tr->matchesCompletely)
00999 {
01000
01001
01002 if (tr->matchedLength > bestTestedRule.length && tr->matchedLength > 1)
01003 {
01004
01005 bestTestedRule.rule = tr->rule;
01006 bestTestedRule.length = tr->matchedLength;
01007 testedRules->remove();
01008 tr = testedRules->current();
01009 continue;
01010 }
01011 else
01012 {
01013
01014 testedRules->remove();
01015 tr = testedRules->current();
01016 continue;
01017 }
01018 }
01019 else
01020 {
01021 canEnd = true;
01022
01023
01024
01025 }
01026 }
01027 else
01028 {
01029
01030 testedRules->remove();
01031 tr = testedRules->current();
01032 continue;
01033 }
01034
01035 tr = testedRules->next();
01036
01037 }
01038
01039 }
01040
01041
01042
01043 if (testedRules->isEmpty())
01044 {
01045 RuleElement *rel;
01046 NEW(rel, RuleElement);
01047
01048 if (bestTestedRule.rule)
01049
01050 {
01051 rel->type = Variable;
01052 rel->value = 0;
01053 rel->rule = 0;
01054 rel->rule = bestTestedRule.rule;
01055
01056 rel->rule->refCount++;
01057
01058
01059 InputItem *ii;
01060 for (size_t i = 0 ; i < bestTestedRule.length; i++)
01061 {
01062 ii = inputFirst->next;
01063 DELETE(inputFirst);
01064 inputFirst = ii;
01065 }
01066 DECREASE_INPUT_QUEUE_LENGTH(bestTestedRule.length);
01067
01068
01069 if (!inputFirst)
01070
01071 inputLast = 0;
01072
01073 }
01074 else
01075
01076 {
01077 rel->type = Terminal;
01078 rel->value = 0;
01079 rel->rule = 0;
01080 rel->value = inputFirst->value;
01081
01082 InputItem *ii = inputFirst->next;
01083 DELETE(inputFirst);
01084 inputFirst = ii;
01085 DECREASE_INPUT_QUEUE_LENGTH(1);
01086
01087 if (!inputFirst)
01088
01089 inputLast = 0;
01090 }
01091
01092
01093 appendToRootRule(rel);
01094 }
01095 else
01096
01097 canEnd = true;
01098 }
01099 }
01100 }
01101
01102
01104
01105
01106
01107
01115 void KYGrammar::append(TerminalValue value)
01116 {
01117 InputItem *i;
01118
01119 NEW(i, InputItem);
01120
01121 i->next = 0;
01122 i->value = value;
01123
01124 if (!inputFirst)
01125 inputFirst = i;
01126 else
01127 inputLast->next = i;
01128
01129 inputLast = i;
01130
01131 INCREASE_INPUT_QUEUE_LENGTH(1);
01132 eatDataPeriodicity--;
01133
01134 totalSymbols++;
01135
01136
01137 if (!eatDataPeriodicity)
01138 {
01139 struct timeval t1, t2, t3;
01140 timerclear(&t1);
01141 timerclear(&t2);
01142 gettimeofday(&t1, 0);
01143
01144 eatData();
01145
01146 gettimeofday(&t2, 0);
01147 timersub(&t2, &t1, &t3);
01148 totalTime+=t3.tv_usec;
01149 eatDataPeriodicity = KY_GRAMMAR_EATDATA_PERIODICITY;
01150 }
01151 }
01152
01153
01154
01160 void KYGrammar::flush(void)
01161 {
01162 DBG("Flushing the grammar...");
01163 while (inputFirst)
01164 {
01165 eatData(true);
01166 }
01167 }
01168
01169
01170
01180 void KYGrammar::appendToRootRule(RuleElement *newElement)
01181 {
01182 RuleElement *last;
01183 NeighboursDescription *nd;
01184 RuleElementNeighbour *r;
01185 Rule *newRule;
01186 bool collision;
01187
01188
01189 if (!textCodec)
01190
01191 createDefaultTextCodec();
01192
01193 if (context)
01194 {
01195 if (!context->isInitialized())
01196 {
01197
01198
01199
01200 setAlphabetBaseSize(textCodec->suggestAlphabetBaseSize(Encodings::UTF_8));
01201 }
01202 }
01203
01204
01205
01206 if (outputDevice)
01207 {
01208 if (newElement->type == Terminal)
01209 {
01210 outputDevice->writeData((const char *)&newElement->value, SIZEOF_XML_CHAR);
01211 }
01212 else
01213 reconstructRule(newElement->rule, outputDevice);
01214 }
01215
01216
01217 runCount++;
01218
01219
01220 size++;
01221
01222 RuleElement appendedElement;
01223 appendedElement = *newElement;
01224
01225
01226 last = rootRule->last;
01227 RIGHT_SIDE_APPEND(rootRule, newElement);
01228
01229 if (last)
01230
01231 {
01232 if (last->type == Terminal)
01233 nd = terminalDigrams->find(last->value);
01234 else
01235 nd = variableDigrams->find(last->rule->id);
01236
01237 if (nd)
01238
01239 {
01240
01241
01242 collision = false;
01243
01244
01245 for (RuleElementNeighbour *n = nd->neighbours.first(); n; n = nd->neighbours.next())
01246 {
01247 if ((n->origin->type == Terminal && newElement->type == Terminal && n->origin->value == newElement->value)
01248 || (n->origin->type == Variable && newElement->type == Variable && n->origin->rule == newElement->rule))
01249
01250 {
01251 if (n->origin == last)
01252 {
01253
01254 collision = true;
01255
01256
01257 I = false;
01258 break;
01259 }
01260 else
01261 {
01262
01263
01264 if (n->rule == rootRule)
01265
01266 {
01267
01268
01269 if (I == false)
01270
01271
01272 {
01273 reductionRule2(n->origin->prev, last);
01274 I = true;
01275 }
01276 else
01277
01278
01279 {
01280 newRule = reductionRule2(n->origin->prev, last);
01281 reductionRule1(newRule);
01282 }
01283 }
01284 else
01285
01286 {
01287
01288
01289
01290 if (n->origin->prev->prev || n->origin->next)
01291 {
01292
01293
01294
01295 if (I == false)
01296
01297
01298 {
01299 reductionRule3(n->rule, n->origin->prev, last);
01300 I = true;
01301 }
01302 else
01303
01304
01305 {
01306 newRule = reductionRule3(n->rule, n->origin->prev, last);
01307 reductionRule1(newRule);
01308 }
01309 }
01310 else
01311 I = false;
01312 }
01313
01314
01315 collision = true;
01316 break;
01317 }
01318 }
01319 }
01320
01321 if (!collision)
01322
01323 {
01324
01325 NEW(r, RuleElementNeighbour);
01326 r->origin = newElement;
01327
01328 r->rule = rootRule;
01329 nd->neighbours.append(r);
01330
01331
01332 I = false;
01333
01334 }
01335 }
01336 else
01337
01338 {
01339
01340 DIGRAM_ADD(last, newElement, rootRule);
01341
01342
01343 I = false;
01344 }
01345 }
01346
01347
01348
01349 #ifdef SEQUENTIAL_ALGORITHM
01350 if (useContextForOutput)
01351 {
01352 if (context)
01353 {
01354 if (appendedElement.type == Terminal)
01355 {
01356
01357
01358
01359 if (context->encode((int)(unsigned char)appendedElement.value) == ContextBase::NotKnown)
01360 {
01361 FATAL("encoding unknown terminal: " << (unsigned char)appendedElement.value);
01362 }
01363
01364 terminalsEncoded++;
01365 }
01366 else
01367 {
01368
01369 long symbolId = context->lastFixedSymbol() + appendedElement.rule->id;
01370
01371
01372
01373 if (context->encode(symbolId) == ContextBase::NotKnown)
01374 {
01375 FATAL("Encoding unknown symbol: " << symbolId);
01376 }
01377
01378 variablesEncoded++;
01379 }
01380 }
01381 }
01382
01383
01384
01385
01386
01387 if (newSymbolToInstall)
01388 {
01389 context->installSymbol(newSymbolToInstall);
01390 newSymbolToInstall = 0;
01391 }
01392
01393
01394 if (size == KY_GRAMMAR_MAX_SIZE)
01395 {
01396 DBG("PURGING THE GRAMMAR");
01397 purge();
01398 }
01399 #endif
01400
01401 }
01402
01403
01404
01410 void KYGrammar::reductionRule1(Rule *rule)
01411 {
01412 RuleElement *rel;
01413 RuleElement *origRuleEl;
01414 Rule *origRule;
01415 NeighboursDescription *nd;
01416 RuleElementNeighbour *r;
01417
01418 CHECK_POINTER(rule);
01419
01420
01421
01422
01423
01424
01425
01426
01427
01428
01429
01430 rel = rule->body;
01431
01432 if (rel->type == Terminal)
01433 {
01434 FATAL("First element is not a variable!");
01435 }
01436
01437
01438 origRule = rel->rule;
01439
01440 RuleId *rid;
01441
01442 NEW(rid, RuleId(rel->rule->id));
01443
01444 availableRuleNumbers->enqueue(rid);
01445
01446
01447
01448 origRuleEl = origRule->body;
01449 while (origRuleEl->next)
01450 {
01451 DIGRAM_CHANGE(origRuleEl, origRuleEl->next, rule);
01452 origRuleEl = origRuleEl->next;
01453 }
01454
01455
01456
01457 origRuleEl->next = rel->next;
01458
01459 if (rel->next)
01460 {
01461 rel->next->prev = origRuleEl;
01462
01463
01464 DIGRAM_ADD(origRuleEl, rel->next, rule);
01465
01466
01467 DIGRAM_REMOVE(rel, rel->next);
01468 }
01469
01470 rule->body = origRule->body;
01471
01472
01473
01474
01475
01476
01477 NeighboursDescription *ndRep;
01478 RuleElement *relRep = rel->rule->body;
01479
01480 if (relRep->type == Terminal)
01481 ndRep = terminalDigrams->find(relRep->value);
01482 else
01483 ndRep = variableDigrams->find(relRep->rule->id);
01484
01485 CHECK_POINTER(ndRep);
01486 if (ndRep)
01487 {
01488
01489 if (!ndRep->representedBy.remove(rule))
01490 {
01491
01492 }
01493 }
01494
01495
01496
01497 if (origRule->body->type == Variable)
01498 {
01499 relRep = origRule->body;
01500
01501 while (relRep->type != Terminal)
01502 relRep = relRep->rule->body;
01503
01504 ndRep = terminalDigrams->find(relRep->value);
01505
01506 CHECK_POINTER(ndRep);
01507 if (ndRep)
01508 {
01509
01510 while (ndRep->representedBy.remove(origRule));
01511
01512
01513 REPRESENTED_BY_APPEND(ndRep->representedBy, rule);
01514
01515 }
01516 }
01517
01518
01519 DELETE(rel);
01520
01521
01522 ruleSet->remove(origRule);
01523 DELETE(origRule);
01524
01525
01526
01527 size--;
01528
01529 }
01530
01531
01532
01541 Rule *KYGrammar::reductionRule2(RuleElement *elOrig1, RuleElement *elNew1)
01542
01543 {
01544 RuleElement *elOrig2;
01545 RuleElement *elNew2;
01546 RuleElement *var1, *var2;
01547 Rule *newRule;
01548 NeighboursDescription *nd;
01549 RuleElementNeighbour *r;
01550
01551
01552
01553 CHECK_POINTER(elOrig1);
01554 CHECK_POINTER(elNew1);
01555
01556 elOrig2 = elOrig1->next;
01557 CHECK_POINTER(elOrig2);
01558
01559
01560
01561 if (elOrig1->type == Variable)
01562 elOrig1->rule->refCount--;
01563
01564 if (elOrig2->type == Variable)
01565 elOrig2->rule->refCount--;
01566
01567
01568 NEW(var1, RuleElement);
01569 var1->type = Variable;
01570 var1->value = 0;
01571 var1->rule = 0;
01572 var1->next = elOrig2->next;
01573
01574
01575 elOrig2->next->prev = var1;
01576 var1->prev = elOrig1->prev;
01577
01578
01579 DIGRAM_REMOVE(elOrig2, var1->next);
01580
01581
01582
01583 elNew2 = elNew1->next;
01584 CHECK_POINTER(elNew2);
01585
01586
01587 NEW(var2, RuleElement);
01588 var2->type = Variable;
01589 var2->value = 0;
01590 var2->rule = 0;
01591 var2->next = 0;
01592 var2->prev = elNew1->prev;
01593 elNew1->prev->next = var2;
01594
01595
01596 rootRule->last = var2;
01597
01598 if (!elOrig1->prev)
01599
01600
01601 rootRule->body = var1;
01602 else
01603 elOrig1->prev->next = var1;
01604
01605
01606 elOrig1->prev = 0;
01607 elOrig2->next = 0;
01608
01609
01610
01611
01612
01613 newRule = createRule(elOrig1, elOrig2);
01614
01615
01616 DIGRAM_CHANGE(elOrig1, elOrig2, newRule);
01617
01618
01619 var1->rule = newRule;
01620 var2->rule = newRule;
01621
01622
01623 newRule->refCount = 2;
01624
01625
01626
01627 DIGRAM_REMOVE(elNew1->prev, elNew1);
01628
01629 if (var1->prev)
01630 DIGRAM_REMOVE(var1->prev, elOrig1);
01631
01632 DIGRAM_ADD(var1, var1->next, rootRule);
01633 DIGRAM_ADD(var2->prev, var2, rootRule);
01634
01635
01636
01637 DELETE(elNew1);
01638 DELETE(elNew2);
01639
01640
01641 RuleElement *relRep;
01642 NeighboursDescription *ndRep;
01643
01644
01645
01646
01647 if (elOrig1->type == Variable)
01648 {
01649 relRep = elOrig1;
01650
01651
01652 while (relRep->type != Terminal)
01653 relRep = relRep->rule->body;
01654
01655 ndRep = terminalDigrams->find(relRep->value);
01656
01657 CHECK_POINTER(ndRep);
01658 if (ndRep)
01659 {
01660
01661 REPRESENTED_BY_APPEND(ndRep->representedBy, newRule);
01662
01663 }
01664
01665 }
01666
01667
01668
01669
01670
01671 return newRule;
01672 }
01673
01674
01675
01685 Rule *KYGrammar::reductionRule3(Rule *ruleOrig, RuleElement *elOrig1, RuleElement *elNew1)
01686 {
01687 RuleElement *elOrig2;
01688 RuleElement *elNew2;
01689 RuleElement *var1, *var2;
01690 Rule *newRule;
01691 NeighboursDescription *nd;
01692 RuleElementNeighbour *r;
01693
01694
01695
01696 CHECK_POINTER(ruleOrig);
01697 CHECK_POINTER(elOrig1);
01698 CHECK_POINTER(elNew1);
01699
01700 elOrig2 = elOrig1->next;
01701 CHECK_POINTER(elOrig2);
01702
01703
01704
01705 if (elOrig1->type == Variable)
01706 elOrig1->rule->refCount--;
01707
01708 if (elOrig2->type == Variable)
01709 elOrig2->rule->refCount--;
01710
01711
01712 NEW(var1, RuleElement);
01713 var1->type = Variable;
01714 var1->value = 0;
01715 var1->rule = 0;
01716
01717 var1->next = elOrig2->next;
01718 if (elOrig2->next)
01719 {
01720
01721
01722 DIGRAM_REMOVE(elOrig2, var1->next);
01723 elOrig2->next->prev = var1;
01724 }
01725 else
01726
01727 ruleOrig->last = var1;
01728
01729
01730 var1->prev = elOrig1->prev;
01731 if (!elOrig1->prev)
01732
01733
01734 ruleOrig->body = var1;
01735 else
01736 elOrig1->prev->next = var1;
01737
01738
01739 elNew2 = elNew1->next;
01740 CHECK_POINTER(elNew2);
01741
01742
01743 NEW(var2, RuleElement);
01744 var2->type = Variable;
01745 var2->value = 0;
01746 var2->rule = 0;
01747 var2->next = 0;
01748 var2->prev = elNew1->prev;
01749
01750 if (elNew1->prev)
01751 elNew1->prev->next = var2;
01752 else
01753
01754 rootRule->body = var2;
01755
01756
01757 rootRule->last = var2;
01758
01759
01760
01761 elOrig1->prev = 0;
01762 elOrig2->next = 0;
01763
01764
01765
01766 newRule = createRule(elOrig1, elOrig2);
01767
01768
01769 DIGRAM_CHANGE(elOrig1, elOrig2, newRule);
01770
01771
01772 var1->rule = newRule;
01773 var2->rule = newRule;
01774
01775
01776 newRule->refCount = 2;
01777
01778
01779
01780
01781 DIGRAM_REMOVE(elNew1->prev, elNew1);
01782 if (var1->prev)
01783 {
01784
01785 DIGRAM_REMOVE(var1->prev, elOrig1);
01786 }
01787
01788 if (var1->next)
01789 DIGRAM_ADD(var1, var1->next, ruleOrig);
01790
01791 if (var1->prev)
01792 DIGRAM_ADD(var1->prev, var1, ruleOrig);
01793
01794 DIGRAM_ADD(var2->prev, var2, rootRule);
01795
01796
01797
01798 DELETE(elNew1);
01799 DELETE(elNew2);
01800
01801
01802 NeighboursDescription *ndRep;
01803 RuleElement *relRep;
01804
01805
01806
01807 if (!var1->prev)
01808 {
01809 relRep = var1;
01810
01811 while (relRep->type != Terminal)
01812 relRep = relRep->rule->body;
01813
01814 ndRep = terminalDigrams->find(relRep->value);
01815
01816 if (ndRep)
01817 {
01818 REPRESENTED_BY_APPEND(ndRep->representedBy, ruleOrig);
01819 }
01820 }
01821
01822
01823
01824 if (elOrig1->type == Variable)
01825 {
01826 if (var1->prev)
01827 {
01828 relRep = elOrig1;
01829
01830 while (relRep->type != Terminal)
01831 relRep = relRep->rule->body;
01832
01833 ndRep = terminalDigrams->find(relRep->value);
01834
01835 }
01836
01837 CHECK_POINTER(ndRep);
01838 if (ndRep)
01839 {
01840
01841
01842 REPRESENTED_BY_APPEND(ndRep->representedBy, newRule);
01843 }
01844 }
01845
01846
01847
01848
01849
01850 return newRule;
01851 }
01852
01853
01854
01860 void KYGrammar::reconstructInput(void)
01861 {
01862
01863 reconstructRule(rootRule, outputDevice);
01864
01865 }
01866
01867
01868
01877 void KYGrammar::reconstructRule(Rule *rule, IODevice *outputDevice)
01878 {
01879 RuleElement *rel;
01880
01881 rel = rule->body;
01882
01883 while (rel)
01884 {
01885 if (rel->type == Terminal)
01886 {
01887 if (!outputDevice)
01888 OUTPUT((char)rel->value);
01889 else
01890 {
01891 outputDevice->writeData((const char *)&rel->value, SIZEOF_XML_CHAR);
01892 }
01893 }
01894 else
01895 reconstructRule(rel->rule, outputDevice);
01896
01897 rel = rel->next;
01898 }
01899 }
01900
01901
01902
01903 size_t KYGrammar::getRuleLength(Rule *rule)
01904 {
01905 RuleElement *rel;
01906 size_t size = 0;
01907
01908 rel = rule->body;
01909
01910 while (rel)
01911 {
01912 if (rel->type == Terminal)
01913 size++;
01914 else
01915 size = size + getRuleLength(rel->rule);
01916
01917 rel = rel->next;
01918 }
01919
01920 return size;
01921 }
01922
01923
01924
01928 void KYGrammar::print(void)
01929 {
01930 Rule *rule;
01931 RuleElement *rel;
01932
01933 OUTPUTENL("KY grammar content");
01934
01935
01936
01937 for (rule = ruleSet->first(); rule; rule = ruleSet->next())
01938 {
01939 OUTPUTE(" R" << std::dec << rule->id << " (length: " << getRuleLength(rule) << ", used: " << rule->refCount << "x) ---> \'");
01940
01941 if (RIGHT_SIDE_IS_EMPTY(rule))
01942 {
01943 OUTPUTE("empty");
01944 }
01945 else
01946 {
01947 for (rel = rule->body; rel; rel = rel->next)
01948 {
01949 if (rel->type == Terminal)
01950 {
01951 if ((int)rel->value > 31)
01952 {
01953
01954 OUTPUTE((char)rel->value << " ");
01955 }
01956 else
01957 {
01958
01959 OUTPUTE("0x");
01960 OUTPUTE(std::hex << std::setw(2) << std::setfill('0') << (int)rel->value << " ");
01961 }
01962 }
01963 else
01964 {
01965 OUTPUTE("R" << rel->rule->id << " ");
01966 }
01967 }
01968 }
01969
01970 OUTPUTE("\'");
01971 OUTPUTEENDLINE;
01972 }
01973
01974 OUTPUTE(std::dec);
01975 OUTPUTEENDLINE;
01976
01977
01978 }
01979
01980
01984 void KYGrammar::printDigrams(void)
01985 {
01986 NeighboursDescription *nd;
01987
01988 OUTPUTENL("** TERMINAL DIGRAMS");
01989 for (long i = 0; i < 9000; i++)
01990 {
01991 nd = terminalDigrams->find(i);
01992 if (nd && nd->neighbours.count())
01993 {
01994 OUTPUTEENDLINE;
01995 OUTPUTENL("DIGRAMS FOR TERMINAL: " << (int)i);
01996 OUTPUTE(" represented by rules: ");
01997 for (Rule *r = nd->representedBy.first(); r; r = nd->representedBy.next())
01998 {
01999 OUTPUTE(r->id << ", ");
02000 }
02001 OUTPUTEENDLINE;
02002
02003 for (RuleElementNeighbour *r = nd->neighbours.first(); r; r = nd->neighbours.next())
02004 {
02005 if (r->origin->type == Terminal)
02006 OUTPUTE(" \"" << (int)r->origin->value << "\"");
02007 else
02008 OUTPUTE(" N" << r->origin->rule->id);
02009
02010 OUTPUTENL(" [" << r->origin << ":" << r->rule << "]");
02011 }
02012 }
02013 }
02014
02015 OUTPUTENL("** VARIABLE DIGRAMS");
02016
02017 for (long i = 0; i < 9000; i++)
02018 {
02019 nd = variableDigrams->find(i);
02020 if (nd && nd->neighbours.count())
02021 {
02022 OUTPUTEENDLINE;
02023 OUTPUTENL("DIGRAMS FOR VARIABLE: " << i);
02024 for (RuleElementNeighbour *r = nd->neighbours.first(); r; r = nd->neighbours.next())
02025 {
02026 if (r->origin->type == Terminal)
02027 OUTPUTE(" \"" << (int)r->origin->value << "\"");
02028 else
02029 OUTPUTE(" N" << r->origin->rule->id);
02030
02031 OUTPUTENL(" [" << r->origin << ":" << r->rule << "]");
02032 }
02033 }
02034 }
02035
02036 OUTPUTEENDLINE;
02037 }
02038
02039
02040
02047 void KYGrammar::setContext(ContextBase *ctxt, bool useForOutput = true)
02048 {
02049 context = ctxt;
02050 useContextForOutput = useForOutput;
02051 }
02052