00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00027 #ifdef __GNUG__
00028 # pragma implementation
00029 #endif
00030
00031
00032
00033 #include "elementmodel.h"
00034
00035
00042 #define INC_TRANSITION_FREQUENCY(_node_, _tr_) \
00043 { \
00044 _tr_->frequency++; \
00045 ElementModel::Node *_tmpnode_ = _node_; \
00046 \
00047 if (_tmpnode_->mpt == _tr_) \
00048 { \
00049 } \
00050 else \
00051 if (!_tmpnode_->mpt) \
00052 { \
00053 _tmpnode_->mpt = _tr_; \
00054 } \
00055 else \
00056 if (_tr_->frequency == _tmpnode_->mpt->frequency) \
00057 { \
00058 _tmpnode_->mpt = _tr_; \
00059 } \
00060 else \
00061 if (_tr_->frequency > _tmpnode_->mpt->frequency)\
00062 { \
00063 _tmpnode_->mpt = _tr_; \
00064 } \
00065 }
00066
00067
00068
00075 #define CREATE_GRAPH_TRANSITION(_from_, _to_) \
00076 { \
00077 ElementModel::Transition *_tr_; \
00078 \
00079 NEW(_tr_, ElementModel::Transition); \
00080 _tr_->frequency = 0; \
00081 _tr_->id = ((ElementModel::Node *)_from_)->successors.count() + 1; \
00082 INC_TRANSITION_FREQUENCY(_from_, _tr_); \
00083 _tr_->node = _to_; \
00084 \
00085 ((ElementModel::Node *)_from_)->successors.append(_tr_); \
00086 }
00087
00088
00089
00098 #define CREATE_GRAPH_NODE(_pred_, _node_, _type_, _modeler_) \
00099 { \
00100 if (_type_ == ElementModel::StartNode) \
00101 { \
00102 ElementModel::AttributeNode *_tmpn_; \
00103 NEW(_tmpn_, ElementModel::AttributeNode); \
00104 _tmpn_->yesAttrCnt = 0; \
00105 _tmpn_->noAttrCnt = 0; \
00106 \
00107 \
00108 _tmpn_->hasAttr = false; \
00109 _node_ = _tmpn_; \
00110 } \
00111 else \
00112 { \
00113 ElementModel::Node *_tmpn_; \
00114 NEW(_tmpn_, ElementModel::Node); \
00115 _node_ = (ElementModel::AttributeNode *)_tmpn_; \
00116 } \
00117 _node_->type = _type_; \
00118 _node_->modeler = _modeler_; \
00119 _node_->id = nodeCounter; \
00120 _node_->mpt = 0; \
00121 nodeList.append(_node_); \
00122 nodeCounter++; \
00123 _node_->successors.setAutoDelete(true); \
00124 \
00125 if (_pred_) \
00126 { \
00127 CREATE_GRAPH_TRANSITION(_pred_, _node_); \
00128 } \
00129 }
00130
00131
00132
00140 #define GET_MOST_PROBABLE_TRANSITION(_node_, _mpt_, _definite_) \
00141 { \
00142 if (_node_->successors.isEmpty()) \
00143 { \
00144 _mpt_ = 0; \
00145 _definite_ = true; \
00146 } \
00147 else \
00148 { \
00149 _mpt_ = _node_->successors.first(); \
00150 _node_->successors.next(); \
00151 _definite_ = true; \
00152 \
00153 for (ElementModel::Transition *_tr_ = _node_->successors.current(); \
00154 _tr_; _tr_ = _node_->successors.next()) \
00155 { \
00156 if (_tr_->frequency == _mpt_->frequency) \
00157 { \
00158 _mpt_ = 0; \
00159 _definite_ = false; \
00160 } \
00161 else \
00162 if (_tr_->frequency > _mpt_->frequency) \
00163 { \
00164 _mpt_ = _tr_; \
00165 _definite_ = true; \
00166 } \
00167 } \
00168 } \
00169 }
00170
00171
00172
00173
00182 #define FIND_TRANSITION(_tr_, _node_, _type_, _modeler_) \
00183 { \
00184 _tr_ = 0; \
00185 for (ElementModel::Transition *_t_ = _node_->successors.first(); \
00186 _t_; _t_ = _node_->successors.next()) \
00187 { \
00188 if (_t_->node->type == _type_) \
00189 { \
00190 if (_t_->node->type != ElementModel::ElementNode) \
00191 { \
00192 _tr_ = _t_; \
00193 INC_TRANSITION_FREQUENCY(_node_, _t_); \
00194 break; \
00195 } \
00196 else \
00197 if (_t_->node->modeler == _modeler_) \
00198 { \
00199 _tr_ = _t_; \
00200 INC_TRANSITION_FREQUENCY(_node_, _tr_); \
00201 break; \
00202 } \
00203 } \
00204 } \
00205 }
00206
00207
00211
00212
00216 ElementModeler::ElementModeler(XmlChar *name)
00217 {
00218 elementName = name;
00219 nodeCounter = 0;
00220 refCount = 1;
00221 averageEntropy = 0;
00222
00223 CREATE_GRAPH_NODE(0, startNode, ElementModel::StartNode, this);
00224 CREATE_GRAPH_NODE(0, endNode, ElementModel::EndNode, this);
00225 resetCurrentNode();
00226
00227
00228
00229 nodeList.setAutoDelete(true);
00230 }
00231
00232
00233
00237 ElementModeler::~ElementModeler(void)
00238 {
00239 }
00240
00241
00245 void ElementModeler::computeAverageEntropy(void)
00246 {
00247 averageEntropy = ElementModelEntropyCalculator::calculate(this);
00248 }
00249
00250
00255 double ElementModeler::getAverageEntropy(void)
00256 {
00257 return averageEntropy;
00258 }
00259
00260
00264 void ElementModeler::resetCurrentNode(void)
00265 {
00266 currentNode = startNode;
00267 }
00268
00269
00270
00274 void ElementModeler::popCurrentNode(void)
00275 {
00276 if (!nodeStack.isEmpty())
00277 {
00278 currentNode = nodeStack.pop();
00279 DBG("POPPED: " << currentNode->type);
00280 }
00281 else
00282 {
00283 DBG("NODE STACK IS EMPTY");
00284 }
00285 }
00286
00287
00293 ElementModel::Node *ElementModeler::getCurrentNode(void)
00294 {
00295 return currentNode;
00296 }
00297
00298
00299
00312 ElementModel::TransitionState ElementModeler::moveToDesiredNode(ElementModel::NodeType desiredNodeType, size_t *edgeId, size_t *elementPushes, ElementModeler *elementModeler = 0)
00313 {
00314 ElementModel::Transition *transition;
00315 ElementModel::Transition *mpt = currentNode->mpt;
00316
00317 FIND_TRANSITION(transition, currentNode, desiredNodeType, elementModeler);
00318
00319 if (!transition)
00320 {
00321
00322 ElementModel::Node *newNode;
00323
00324
00325 if (desiredNodeType == ElementModel::ElementNode)
00326 {
00327
00328
00329 CREATE_GRAPH_NODE(currentNode, newNode, desiredNodeType, elementModeler);
00330
00331
00332
00333 nodeStack.push(newNode);
00334 }
00335 else
00336 if (desiredNodeType == ElementModel::EndNode)
00337 {
00338
00339
00340 newNode = endNode;
00341
00342 CREATE_GRAPH_TRANSITION(currentNode, newNode);
00343 }
00344 else
00345 {
00346
00347
00348 CREATE_GRAPH_NODE(currentNode, newNode, desiredNodeType, 0);
00349 }
00350
00351 currentNode = newNode;
00352
00353 return ElementModel::NewNodeCreated;
00354 }
00355 else
00356 {
00357
00358 if (transition == mpt)
00359 {
00360
00361
00362
00363
00364 ElementModel::TransitionState result = ElementModel::Definite;
00365
00366 switch (desiredNodeType)
00367 {
00368 case ElementModel::EndNode:
00369
00370 currentNode = transition->node;
00371 *edgeId = transition->id;
00372 return result;
00373
00374 case ElementModel::CharactersNode:
00375
00376 currentNode = transition->node;
00377 *edgeId = transition->id;
00378 return result;
00379
00380 case ElementModel::ElementNode:
00381
00382
00383
00384 nodeStack.push(transition->node);
00385 *edgeId = transition->id;
00386 currentNode = transition->node;
00387 *elementPushes = (*elementPushes) + 1;
00388
00389 return result;
00390
00391 default:
00392 return ElementModel::Impossible;
00393 }
00394 }
00395 else
00396 {
00397
00398
00399
00400 ElementModel::TransitionState result = ElementModel::Indefinite;
00401
00402 switch (desiredNodeType)
00403 {
00404 case ElementModel::EndNode:
00405 currentNode = transition->node;
00406 *edgeId = transition->id;
00407 return result;
00408
00409 case ElementModel::CharactersNode:
00410 currentNode = transition->node;
00411 *edgeId = transition->id;
00412 return result;
00413
00414 case ElementModel::ElementNode:
00415
00416 *elementPushes = (*elementPushes) + 1;
00417 nodeStack.push(transition->node);
00418 currentNode = transition->node;
00419 *edgeId = transition->id;
00420 return result;
00421
00422 default:
00423 return ElementModel::Impossible;
00424 }
00425 }
00426 }
00427
00428
00429 }
00430
00431
00432
00442 ElementModel::NodeType ElementModeler::moveForward(ElementModel::Node **node, SAXEmitter *saxEmitter, void *userData)
00443 {
00444 if (!currentNode->mpt)
00445 {
00446
00447 return ElementModel::NoNode;
00448 }
00449 else
00450 {
00451 INC_TRANSITION_FREQUENCY(currentNode, currentNode->mpt);
00452
00453 currentNode = currentNode->mpt->node;
00454 *node = currentNode;
00455
00456
00457 switch(currentNode->type)
00458 {
00459 case ElementModel::EndNode:
00460 saxEmitter->endElement(userData, elementName);
00461 break;
00462
00463 case ElementModel::ElementNode:
00464
00465
00466 nodeStack.push(currentNode);
00467 break;
00468
00469 default: ;
00470 }
00471 }
00472
00473 return currentNode->type;
00474 }
00475
00476
00477
00488 ElementModel::NodeType ElementModeler::followEdge(size_t edgeId, ElementModel::Node **node, SAXEmitter *saxEmitter, void *userData)
00489 {
00490
00491 ElementModel::Transition *tr;
00492
00493 for (tr = currentNode->successors.first(); tr; tr = currentNode->successors.next())
00494 {
00495 if (tr->id == edgeId)
00496 break;
00497 }
00498
00499 if (!tr)
00500 {
00501
00502 return ElementModel::NoNode;
00503 }
00504 else
00505 {
00506 INC_TRANSITION_FREQUENCY(currentNode, tr);
00507
00508 currentNode = tr->node;
00509 *node = currentNode;
00510
00511
00512 switch(currentNode->type)
00513 {
00514 case ElementModel::EndNode:
00515 saxEmitter->endElement(userData, elementName);
00516 break;
00517
00518 case ElementModel::ElementNode:
00519
00520
00521 nodeStack.push(currentNode);
00522 break;
00523
00524 default: ;
00525 }
00526 }
00527
00528 return currentNode->type;
00529 }
00530
00531
00532
00538 ElementModel::NodeType ElementModeler::currentNodeType(void)
00539 {
00540 if (!currentNode)
00541 return ElementModel::NoNode;
00542 else
00543 return currentNode->type;
00544 }
00545
00546
00554 bool ElementModeler::setAttributes(bool attributes)
00555 {
00556 bool result = hasAttributes();
00557
00558 if (attributes)
00559 {
00560 startNode->yesAttrCnt++;
00561 if (startNode->yesAttrCnt >= startNode->noAttrCnt)
00562 startNode->hasAttr = true;
00563 }
00564 else
00565 {
00566 startNode->noAttrCnt++;
00567 if (startNode->noAttrCnt >= startNode->yesAttrCnt)
00568 startNode->hasAttr = false;
00569 }
00570
00571
00572 return result;
00573 }
00574
00575
00576
00582 bool ElementModeler::hasAttributes(void)
00583 {
00584 DBG("YES: " << startNode->yesAttrCnt);
00585 DBG("NO: " << startNode->noAttrCnt);
00586 return startNode->hasAttr;
00587 }
00588
00589
00590
00596 ElementModel::Node *ElementModeler::getStartNode(void)
00597 {
00598 return startNode;
00599 }
00600
00601
00602
00608 ElementModel::Node *ElementModeler::getEndNode(void)
00609 {
00610 return endNode;
00611 }
00612
00613
00614
00620 XmlChar *ElementModeler::getElementName(void)
00621 {
00622 return elementName;
00623 }
00624
00625
00629 void ElementModeler::increaseRefCount(void)
00630 {
00631 refCount++;
00632 }
00633
00634
00640 size_t ElementModeler::getRefCount(void)
00641 {
00642 return refCount;
00643 }
00644
00645
00646
00650 void ElementModeler::print(void)
00651 {
00652 OUTPUTENL(" Model for element \"" << elementName << "\"");
00653 OUTPUTENL(" Number of references:\t\t" << refCount);
00654 OUTPUTENL(" Has attributes (Yes/No):\t\t" << startNode->yesAttrCnt << "/" << startNode->noAttrCnt);
00655 OUTPUTENL(" Average entropy:\t\t\t" << averageEntropy);
00656 OUTPUTEENDLINE;
00657
00658 printNode(startNode);
00659 printNode(endNode);
00660
00661
00662 OUTPUTEENDLINE;
00663
00664
00665
00666
00667
00668 }
00669
00670
00671
00677 void ElementModeler::printNode(ElementModel::Node *node)
00678 {
00679 if (node == currentNode)
00680 {
00681 OUTPUTE("-> ");
00682 }
00683 else
00684 {
00685 OUTPUTE(" ");
00686 }
00687
00688
00689 OUTPUTE(" " << node->id << " (");
00690 switch (node->type)
00691 {
00692 case ElementModel::StartNode:
00693 OUTPUTE(STR_NODE_TYPE_START_NODE);
00694 break;
00695
00696 case ElementModel::EndNode:
00697 OUTPUTE(STR_NODE_TYPE_END_NODE);
00698 break;
00699
00700 case ElementModel::ElementNode:
00701 OUTPUTE(STR_NODE_TYPE_ELEMENT_NODE);
00702 break;
00703
00704 case ElementModel::CharactersNode:
00705 OUTPUTE(STR_NODE_TYPE_CHARACTERS_NODE);
00706 break;
00707
00708 default:;
00709 }
00710
00711 if (node->type == ElementModel::ElementNode)
00712 {
00713 OUTPUTE(": " << node->modeler->elementName);
00714 }
00715
00716 OUTPUTE("): ");
00717
00718 bool first = true;
00719 for (ElementModel::Transition *ss = node->successors.first(); ss; ss = node->successors.next())
00720 {
00721
00722 if (!first)
00723 {
00724 OUTPUTE(", ");
00725 }
00726 else
00727 first = false;
00728
00729 if (ss == node->mpt)
00730 {
00731 OUTPUTE("*");
00732 }
00733
00734 OUTPUTE(ss->node->id << "[" << ss->frequency << "]");
00735 }
00736
00737 OUTPUTEENDLINE;
00738
00739 for (ElementModel::Transition *ss = node->successors.first(); ss; ss = node->successors.next())
00740 {
00741 if (ss->node->type != ElementModel::EndNode)
00742 printNode(ss->node);
00743 }
00744 }
00745
00746
00747
00751
00752
00760 double ElementModelEntropyCalculator::calculate(ElementModeler *modeler)
00761 {
00762 ElementModel::Node *startNode;
00763 ElementModel::Node *endNode;
00764 ElementModel::Transition *tr;
00765 size_t nodeTransitionsTotal;
00766 double averageEntropy = 0;
00767 ElementModelTransitionInfo *tri;
00768
00769 Stack<ElementModelTransitionInfo> trStack;
00770 trStack.setAutoDelete(true);
00771
00772 startNode = modeler->getStartNode();
00773 endNode = modeler->getEndNode();
00774
00775 nodeTransitionsTotal = 0;
00776
00777
00778 for (tr = startNode->successors.first(); tr; tr = startNode->successors.next())
00779 {
00780 nodeTransitionsTotal += tr->frequency;
00781 }
00782
00783 for (tr = startNode->successors.first(); tr; tr = startNode->successors.next())
00784 {
00785 NEW(tri, ElementModelTransitionInfo);
00786 tri->transition = tr;
00787 tri->probability = (double)tr->frequency / nodeTransitionsTotal;
00788
00789 trStack.push(tri);
00790 }
00791
00792
00793
00794 while (!trStack.isEmpty())
00795 {
00796 tri = trStack.pop();
00797 ElementModel::Transition *t = tri->transition;
00798 double prob = tri->probability;
00799
00800
00801 if (t->node == endNode)
00802 {
00803
00804 averageEntropy += tri->probability*abs(log(tri->probability));
00805 }
00806 else
00807 {
00808 nodeTransitionsTotal = 0;
00809
00810 for (tr = t->node->successors.first(); tr; tr = t->node->successors.next())
00811 {
00812 nodeTransitionsTotal += tr->frequency;
00813 }
00814
00815 for (tr = t->node->successors.first(); tr; tr = t->node->successors.next())
00816 {
00817 NEW(tri, ElementModelTransitionInfo);
00818 tri->transition = tr;
00819 tri->probability = (prob * tr->frequency) / nodeTransitionsTotal;
00820 trStack.push(tri);
00821 }
00822 }
00823 }
00824
00825 trStack.clear();
00826
00827 return averageEntropy;
00828 }