Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members  

elementmodel.cpp

Go to the documentation of this file.
00001 /***************************************************************************
00002     elementmodel.cpp  -  Definition of ElementModeler class methods
00003                              -------------------
00004     begin                : November 19 2002
00005     copyright            : (C) 2003 by Vojtìch Toman
00006     email                : vtoman@lit.cz
00007 ***************************************************************************/
00008 
00009 /***************************************************************************
00010  *                                                                         *
00011  *   This program is free software; you can redistribute it and/or modify  *
00012  *   it under the terms of the GNU General Public License as published by  *
00013  *   the Free Software Foundation; either version 2 of the License, or     *
00014  *   (at your option) any later version.                                   *
00015  *                                                                         *
00016  ***************************************************************************/
00017 
00018 
00027 #ifdef __GNUG__
00028 # pragma implementation
00029 #endif
00030 
00031 
00032 
00033 #include "elementmodel.h"
00034 
00035 
00042 #define INC_TRANSITION_FREQUENCY(_node_, _tr_)          \
00043 {                                                       \
00044   _tr_->frequency++;                                    \
00045   ElementModel::Node *_tmpnode_ = _node_;               \
00046                                                         \
00047   if (_tmpnode_->mpt == _tr_)                           \
00048     {                                                   \
00049     }                                                   \
00050   else                                                  \
00051     if (!_tmpnode_->mpt)                                \
00052       {                                                 \
00053         _tmpnode_->mpt = _tr_;                          \
00054       }                                                 \
00055     else                                                \
00056       if (_tr_->frequency == _tmpnode_->mpt->frequency) \
00057         {                                               \
00058           _tmpnode_->mpt = _tr_;                        \
00059         }                                               \
00060       else                                              \
00061         if (_tr_->frequency > _tmpnode_->mpt->frequency)\
00062           {                                             \
00063             _tmpnode_->mpt = _tr_;                      \
00064           }                                             \
00065 }
00066 
00067 
00068 
00075 #define CREATE_GRAPH_TRANSITION(_from_, _to_)                           \
00076 {                                                                       \
00077   ElementModel::Transition *_tr_;                                       \
00078                                                                         \
00079   NEW(_tr_, ElementModel::Transition);                                  \
00080   _tr_->frequency = 0;                                                  \
00081   _tr_->id = ((ElementModel::Node *)_from_)->successors.count() + 1;    \
00082   INC_TRANSITION_FREQUENCY(_from_, _tr_);                               \
00083   _tr_->node = _to_;                                                    \
00084                                                                         \
00085   ((ElementModel::Node *)_from_)->successors.append(_tr_);              \
00086 }
00087 
00088 
00089 
00098 #define CREATE_GRAPH_NODE(_pred_, _node_, _type_, _modeler_)    \
00099 {                                                               \
00100   if (_type_ == ElementModel::StartNode)                        \
00101     {                                                           \
00102       ElementModel::AttributeNode *_tmpn_;                      \
00103       NEW(_tmpn_, ElementModel::AttributeNode);                 \
00104       _tmpn_->yesAttrCnt = 0;                                   \
00105       _tmpn_->noAttrCnt = 0;                                    \
00106       /* by default, elements are supposed not */               \
00107       /* to have attributes */                                  \
00108       _tmpn_->hasAttr = false;                                  \
00109       _node_ = _tmpn_;                                          \
00110     }                                                           \
00111   else                                                          \
00112     {                                                           \
00113       ElementModel::Node *_tmpn_;                               \
00114       NEW(_tmpn_, ElementModel::Node);                          \
00115       _node_ = (ElementModel::AttributeNode *)_tmpn_;           \
00116     }                                                           \
00117   _node_->type = _type_;                                        \
00118   _node_->modeler = _modeler_;                                  \
00119   _node_->id = nodeCounter;                                     \
00120   _node_->mpt = 0;                                              \
00121   nodeList.append(_node_);                                      \
00122   nodeCounter++;                                                \
00123   _node_->successors.setAutoDelete(true);                       \
00124                                                                 \
00125  if (_pred_)                                                    \
00126    {                                                            \
00127      CREATE_GRAPH_TRANSITION(_pred_, _node_);                   \
00128    }                                                            \
00129 }
00130 
00131 
00132 
00140 #define GET_MOST_PROBABLE_TRANSITION(_node_, _mpt_, _definite_)                 \
00141 {                                                                               \
00142   if (_node_->successors.isEmpty())                                             \
00143     {                                                                           \
00144       _mpt_ = 0;                                                                \
00145       _definite_ = true;                                                        \
00146     }                                                                           \
00147   else                                                                          \
00148     {                                                                           \
00149       _mpt_ = _node_->successors.first();                                       \
00150       _node_->successors.next();                                                \
00151       _definite_ = true;                                                        \
00152                                                                                 \
00153       for (ElementModel::Transition *_tr_ = _node_->successors.current();       \
00154            _tr_; _tr_ = _node_->successors.next())                              \
00155         {                                                                       \
00156           if (_tr_->frequency == _mpt_->frequency)                              \
00157             {                                                                   \
00158               _mpt_ = 0;                                                        \
00159               _definite_ = false;                                               \
00160             }                                                                   \
00161           else                                                                  \
00162             if (_tr_->frequency > _mpt_->frequency)                             \
00163               {                                                                 \
00164                 _mpt_ = _tr_;                                                   \
00165                 _definite_ = true;                                              \
00166               }                                                                 \
00167         }                                                                       \
00168     }                                                                           \
00169 }
00170 
00171 
00172 
00173 
00182 #define FIND_TRANSITION(_tr_, _node_, _type_, _modeler_)                \
00183 {                                                                       \
00184   _tr_ = 0;                                                             \
00185   for (ElementModel::Transition *_t_ = _node_->successors.first();      \
00186        _t_; _t_ = _node_->successors.next())                            \
00187     {                                                                   \
00188       if (_t_->node->type == _type_)                                    \
00189         {                                                               \
00190           if (_t_->node->type != ElementModel::ElementNode)             \
00191             {                                                           \
00192               _tr_ = _t_;                                               \
00193               INC_TRANSITION_FREQUENCY(_node_, _t_);                    \
00194               break;                                                    \
00195             }                                                           \
00196           else                                                          \
00197             if (_t_->node->modeler == _modeler_)                        \
00198               {                                                         \
00199                 _tr_ = _t_;                                             \
00200                 INC_TRANSITION_FREQUENCY(_node_, _tr_);                 \
00201                 break;                                                  \
00202               }                                                         \
00203         }                                                               \
00204     }                                                                   \
00205 }
00206 
00207 
00211 
00212 
00216 ElementModeler::ElementModeler(XmlChar *name)
00217 {
00218   elementName = name;
00219   nodeCounter = 0;
00220   refCount = 1;
00221   averageEntropy = 0;
00222 
00223   CREATE_GRAPH_NODE(0, startNode, ElementModel::StartNode, this);
00224   CREATE_GRAPH_NODE(0, endNode, ElementModel::EndNode, this);
00225   resetCurrentNode();
00226 
00227   //this causes the nodes and the transitions to be deleted
00228   //in the destructor
00229   nodeList.setAutoDelete(true);
00230 }
00231 
00232 
00233 
00237 ElementModeler::~ElementModeler(void)
00238 {
00239 }
00240 
00241 
00245 void ElementModeler::computeAverageEntropy(void)
00246 {
00247   averageEntropy = ElementModelEntropyCalculator::calculate(this);
00248 }
00249 
00250 
00255 double ElementModeler::getAverageEntropy(void)
00256 {
00257   return averageEntropy;
00258 }
00259 
00260 
00264 void ElementModeler::resetCurrentNode(void)
00265 {
00266   currentNode = startNode;
00267 }
00268 
00269 
00270 
00274 void ElementModeler::popCurrentNode(void)
00275 {
00276   if (!nodeStack.isEmpty())
00277     {
00278       currentNode = nodeStack.pop();
00279       DBG("POPPED: " << currentNode->type);
00280     }
00281   else
00282     {
00283       DBG("NODE STACK IS EMPTY");
00284     }
00285 }
00286 
00287 
00293 ElementModel::Node *ElementModeler::getCurrentNode(void)
00294 {
00295   return currentNode;
00296 }
00297 
00298 
00299 
00312 ElementModel::TransitionState ElementModeler::moveToDesiredNode(ElementModel::NodeType desiredNodeType, size_t *edgeId, size_t *elementPushes, ElementModeler *elementModeler = 0)
00313 {
00314   ElementModel::Transition *transition; //the most probable transition from the current node
00315   ElementModel::Transition *mpt = currentNode->mpt;
00316 
00317   FIND_TRANSITION(transition, currentNode, desiredNodeType, elementModeler);
00318 
00319   if (!transition)
00320     {
00321       //there is no transition of desired type --> we need to create it
00322       ElementModel::Node *newNode;
00323 
00324       //create the desired successor
00325       if (desiredNodeType == ElementModel::ElementNode)
00326         {
00327           //element node successor
00328           //create new element node successor
00329           CREATE_GRAPH_NODE(currentNode, newNode, desiredNodeType, elementModeler);
00330 
00331 
00332           //remember the node that leads to new element model
00333           nodeStack.push(newNode);
00334         }
00335       else
00336         if (desiredNodeType == ElementModel::EndNode)
00337           {
00338             //connect with the end node
00339 
00340             newNode = endNode;
00341 
00342             CREATE_GRAPH_TRANSITION(currentNode, newNode);
00343           }
00344         else
00345           {
00346             //create other successor
00347             //      DBG("ordinary node created");
00348             CREATE_GRAPH_NODE(currentNode, newNode, desiredNodeType, 0);
00349           }
00350 
00351       currentNode = newNode;
00352 
00353       return ElementModel::NewNodeCreated;
00354     }
00355   else
00356     {
00357       //there is  a transition of desired type
00358       if (transition == mpt)
00359         {
00360           //DBG("used mpt definite");
00361 
00362           //the found transition is the most probable transition
00363 
00364           ElementModel::TransitionState result = ElementModel::Definite;
00365 
00366           switch (desiredNodeType)
00367             {
00368             case ElementModel::EndNode:
00369               //              DBG("definite end node");
00370               currentNode = transition->node;
00371               *edgeId = transition->id;
00372               return result;
00373 
00374             case ElementModel::CharactersNode:
00375               //              DBG("definite character node");
00376               currentNode = transition->node;
00377               *edgeId = transition->id;
00378               return result;
00379                   
00380             case ElementModel::ElementNode:
00381               //              DBG("definite element node");
00382 
00383               //remember the node that leads to new element model
00384               nodeStack.push(transition->node);
00385               *edgeId = transition->id;
00386               currentNode = transition->node;
00387               *elementPushes = (*elementPushes) + 1;
00388 
00389               return result;
00390                   
00391             default:
00392               return ElementModel::Impossible;
00393             }
00394         }
00395       else
00396         {
00397           //the found transition either isn't the mpt
00398           //DBG("not mpt");
00399 
00400           ElementModel::TransitionState result = ElementModel::Indefinite;
00401 
00402           switch (desiredNodeType)
00403             {
00404             case ElementModel::EndNode:
00405               currentNode = transition->node;
00406               *edgeId = transition->id;
00407               return result;
00408 
00409             case ElementModel::CharactersNode:
00410               currentNode = transition->node;
00411               *edgeId = transition->id;
00412               return result;
00413               
00414             case ElementModel::ElementNode:
00415               //remember the node that leads to new element model
00416               *elementPushes = (*elementPushes) + 1;
00417               nodeStack.push(transition->node);
00418               currentNode = transition->node;
00419               *edgeId = transition->id;
00420               return result;
00421               
00422             default:
00423               return ElementModel::Impossible;
00424             }
00425         }
00426     }
00427 
00428   //return ElementModel::Impossible;
00429 }
00430 
00431 
00432 
00442 ElementModel::NodeType ElementModeler::moveForward(ElementModel::Node **node, SAXEmitter *saxEmitter, void *userData)
00443 {
00444   if (!currentNode->mpt)
00445     {
00446       //there is no transition of desired type --> we need to create it
00447       return ElementModel::NoNode;
00448     }
00449   else
00450     {
00451       INC_TRANSITION_FREQUENCY(currentNode, currentNode->mpt);
00452 
00453       currentNode = currentNode->mpt->node;
00454       *node = currentNode;
00455 
00456       //when moving, emit SAX events reflecting the structure of the graph
00457       switch(currentNode->type)
00458         {
00459         case ElementModel::EndNode:
00460           saxEmitter->endElement(userData, elementName);
00461           break;
00462 
00463         case ElementModel::ElementNode:
00464           //remember the node that leads to new element model
00465           //      *elementPushes = (*elementPushes) + 1;
00466           nodeStack.push(currentNode);
00467           break;
00468 
00469         default: ;
00470         }
00471     }
00472 
00473   return currentNode->type;
00474 }
00475 
00476 
00477 
00488 ElementModel::NodeType ElementModeler::followEdge(size_t edgeId, ElementModel::Node **node, SAXEmitter *saxEmitter, void *userData)
00489 {
00490   //important: first edge has id 1!
00491   ElementModel::Transition *tr;
00492 
00493   for (tr = currentNode->successors.first(); tr; tr = currentNode->successors.next())
00494     {
00495       if (tr->id == edgeId)
00496         break;
00497     }
00498 
00499   if (!tr)
00500     {
00501       //there is no transition with desired id --> report it (should never happen)
00502       return ElementModel::NoNode;
00503     }
00504   else
00505     {
00506       INC_TRANSITION_FREQUENCY(currentNode, tr);
00507 
00508       currentNode = tr->node;
00509       *node = currentNode;
00510 
00511       //when moving, emit SAX events reflecting the structure of the graph
00512       switch(currentNode->type)
00513         {
00514         case ElementModel::EndNode:
00515           saxEmitter->endElement(userData, elementName);
00516           break;
00517 
00518         case ElementModel::ElementNode:
00519           //remember the node that leads to new element model
00520           //      *elementPushes = (*elementPushes) + 1;
00521           nodeStack.push(currentNode);
00522           break;
00523 
00524         default: ;
00525         }
00526     }
00527 
00528   return currentNode->type;
00529 }
00530 
00531 
00532 
00538 ElementModel::NodeType ElementModeler::currentNodeType(void)
00539 {
00540   if (!currentNode)
00541     return ElementModel::NoNode;
00542   else
00543     return currentNode->type;
00544 }
00545 
00546 
00554 bool ElementModeler::setAttributes(bool attributes)
00555 {
00556   bool result = hasAttributes();
00557 
00558   if (attributes)
00559     {
00560       startNode->yesAttrCnt++;
00561       if (startNode->yesAttrCnt >= startNode->noAttrCnt)
00562         startNode->hasAttr = true;
00563     }
00564   else
00565     {
00566       startNode->noAttrCnt++;
00567       if (startNode->noAttrCnt >= startNode->yesAttrCnt)
00568         startNode->hasAttr = false;
00569     }
00570 
00571   //return the supposed atribute state before the update
00572   return result;
00573 }
00574 
00575 
00576 
00582 bool ElementModeler::hasAttributes(void)
00583 {
00584   DBG("YES: " << startNode->yesAttrCnt);
00585   DBG("NO: " << startNode->noAttrCnt);
00586   return startNode->hasAttr;
00587 }
00588 
00589 
00590 
00596 ElementModel::Node *ElementModeler::getStartNode(void)
00597 {
00598   return startNode;
00599 }
00600 
00601 
00602 
00608 ElementModel::Node *ElementModeler::getEndNode(void)
00609 {
00610   return endNode;
00611 }
00612 
00613 
00614 
00620 XmlChar *ElementModeler::getElementName(void)
00621 {
00622   return elementName;
00623 }
00624 
00625 
00629 void ElementModeler::increaseRefCount(void)
00630 {
00631   refCount++;
00632 }
00633 
00634 
00640 size_t ElementModeler::getRefCount(void)
00641 {
00642   return refCount;
00643 }
00644 
00645 
00646 
00650 void ElementModeler::print(void)
00651 {
00652   OUTPUTENL("  Model for element \"" << elementName << "\"");
00653   OUTPUTENL("    Number of references:\t\t" << refCount);
00654   OUTPUTENL("    Has attributes (Yes/No):\t\t" << startNode->yesAttrCnt << "/" << startNode->noAttrCnt);
00655   OUTPUTENL("    Average entropy:\t\t\t" << averageEntropy);
00656   OUTPUTEENDLINE;
00657 
00658   printNode(startNode);
00659   printNode(endNode);
00660 
00661   
00662   OUTPUTEENDLINE;
00663 
00664   //   if (!nodeStack.isEmpty())
00665   //     {
00666   //       OUTPUTENL("NODE STACK TOP: " << nodeStack.top()->modeler);
00667   //     }
00668 }
00669 
00670 
00671 
00677 void ElementModeler::printNode(ElementModel::Node *node)
00678 {
00679   if (node == currentNode)
00680     {
00681       OUTPUTE("-> ");
00682     }
00683   else
00684     {
00685       OUTPUTE("   ");
00686     }
00687 
00688 
00689   OUTPUTE("      " << node->id << " (");
00690   switch (node->type)
00691     {
00692     case ElementModel::StartNode:
00693       OUTPUTE(STR_NODE_TYPE_START_NODE);
00694       break;
00695        
00696     case ElementModel::EndNode:
00697       OUTPUTE(STR_NODE_TYPE_END_NODE);
00698       break;
00699        
00700     case ElementModel::ElementNode:
00701       OUTPUTE(STR_NODE_TYPE_ELEMENT_NODE);
00702       break;
00703        
00704     case ElementModel::CharactersNode:
00705       OUTPUTE(STR_NODE_TYPE_CHARACTERS_NODE);
00706       break;
00707        
00708     default:;
00709     }
00710 
00711   if (node->type == ElementModel::ElementNode)
00712     {
00713       OUTPUTE(": " << node->modeler->elementName);
00714     }
00715 
00716   OUTPUTE("): ");
00717 
00718   bool first = true;
00719   for (ElementModel::Transition *ss = node->successors.first(); ss; ss = node->successors.next())
00720     {
00721 
00722       if (!first)
00723         {
00724           OUTPUTE(", ");
00725         }
00726       else
00727         first = false;
00728 
00729       if (ss == node->mpt)
00730         {
00731           OUTPUTE("*");
00732         }
00733 
00734       OUTPUTE(ss->node->id << "[" << ss->frequency << "]");
00735     }
00736 
00737   OUTPUTEENDLINE;
00738 
00739   for (ElementModel::Transition *ss = node->successors.first(); ss; ss = node->successors.next())
00740     {
00741       if (ss->node->type != ElementModel::EndNode)
00742         printNode(ss->node);
00743     }
00744 }
00745 
00746 
00747 
00751 
00752 
00760 double ElementModelEntropyCalculator::calculate(ElementModeler *modeler)
00761 {
00762   ElementModel::Node *startNode;
00763   ElementModel::Node *endNode;
00764   ElementModel::Transition *tr;
00765   size_t nodeTransitionsTotal;
00766   double averageEntropy = 0;
00767   ElementModelTransitionInfo *tri;
00768 
00769   Stack<ElementModelTransitionInfo> trStack;
00770   trStack.setAutoDelete(true);
00771 
00772   startNode = modeler->getStartNode();
00773   endNode = modeler->getEndNode();
00774 
00775   nodeTransitionsTotal = 0;
00776 
00777 
00778   for (tr = startNode->successors.first(); tr; tr = startNode->successors.next())
00779     {
00780       nodeTransitionsTotal += tr->frequency;
00781     }
00782 
00783   for (tr = startNode->successors.first(); tr; tr = startNode->successors.next())
00784     {
00785       NEW(tri, ElementModelTransitionInfo);
00786       tri->transition = tr;
00787       tri->probability = (double)tr->frequency / nodeTransitionsTotal;
00788 
00789       trStack.push(tri);
00790     }
00791 
00792 
00793 
00794   while (!trStack.isEmpty())
00795     {
00796       tri = trStack.pop();
00797       ElementModel::Transition *t = tri->transition;
00798       double prob = tri->probability;
00799 
00800 
00801       if (t->node == endNode)
00802         {
00803           //reached the end node
00804           averageEntropy += tri->probability*abs(log(tri->probability));
00805         }
00806       else
00807         {
00808           nodeTransitionsTotal = 0;
00809 
00810           for (tr = t->node->successors.first(); tr; tr = t->node->successors.next())
00811             {
00812               nodeTransitionsTotal += tr->frequency;
00813             }
00814           
00815           for (tr = t->node->successors.first(); tr; tr = t->node->successors.next())
00816             {
00817               NEW(tri, ElementModelTransitionInfo);
00818               tri->transition = tr;
00819               tri->probability = (prob * tr->frequency) / nodeTransitionsTotal;
00820               trStack.push(tri);
00821             }
00822         }
00823     }
00824 
00825   trStack.clear();
00826 
00827   return averageEntropy;
00828 }

Generated on Wed Feb 5 10:43:01 2003 for Exalt by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002