Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members  

xmlsimplemodel.cpp

Go to the documentation of this file.
00001 /***************************************************************************
00002     xmlmodel.cpp  -  Definitions of class XmlSimpleModel methods
00003                              -------------------
00004     begin                : October 3 2002
00005     copyright            : (C) 2003 by Vojtìch Toman
00006     email                : vtoman@lit.cz
00007 ***************************************************************************/
00008 
00009 /***************************************************************************
00010  *                                                                         *
00011  *   This program is free software; you can redistribute it and/or modify  *
00012  *   it under the terms of the GNU General Public License as published by  *
00013  *   the Free Software Foundation; either version 2 of the License, or     *
00014  *   (at your option) any later version.                                   *
00015  *                                                                         *
00016  ***************************************************************************/
00017 
00018 
00027 #ifdef __GNUG__
00028 # pragma implementation
00029 #endif
00030 
00031 
00032 #include "xmlsimplemodel.h"
00033 
00034 
00042 #define SAFE_CALL_EMITTER(_x_)          \
00043 if (saxEmitter)                         \
00044   saxEmitter->_x_;
00045 
00046 
00048 #define RESET_BUFFER                    \
00049 {                                       \
00050   buffer[0] = 0;                        \
00051   bufferLength = 0;                     \
00052 }
00053 
00054 
00062 #define CHANGE_STATE(_s_)                                       \
00063 {                                                               \
00064   stateChanged = false;                                         \
00065   if (state != StructuralSymbols::KnownElement &&               \
00066       (state == StructuralSymbols::None || state == _s_))       \
00067   {                                                             \
00068     state = _s_;                                                \
00069     stateChanged = true;                                        \
00070   }                                                             \
00071 }
00072 
00074 #define RESET_STATE                     state = StructuralSymbols::None
00075 
00076 
00085 #define APPEND_ALL_DATA_LENGTH(_gr_, _data_, _length_, _complete_)      \
00086 {                                                                       \
00087   CHECK_POINTER(_gr_);                                                  \
00088                                                                         \
00089   for (int _i_ = 0; _i_ < _length_; _i_++)                              \
00090     _gr_->append((TerminalValue)_data_[_i_]);                           \
00091                                                                         \
00092   if (_complete_)                                                       \
00093     _gr_->append(StructuralSymbols::EndOfBlock);                        \
00094 }
00095 
00096 
00104 #define APPEND_ALL_DATA(_gr_, _data_, _complete_)       \
00105 {                                                       \
00106   CHECK_POINTER(_gr_);                                  \
00107                                                         \
00108   if (!_data_)                                          \
00109     return false;                                       \
00110                                                         \
00111   for (int _i_ = 0; _data_[_i_]; _i_++)                 \
00112     _gr_->append((TerminalValue)_data_[_i_]);           \
00113                                                         \
00114   if (_complete_)                                       \
00115     _gr_->append(StructuralSymbols::EndOfBlock);        \
00116 }
00117 
00118 
00126 #define APPEND_BUFFER_DATA(_gr_, _buffer_, _nrItems_)   \
00127 {                                                       \
00128   for (size_t _i_ = 0; _i_ < _nrItems_; _i_++)          \
00129     {                                                   \
00130       _gr_->append(_buffer_[_i_]);                      \
00131     }                                                   \
00132 }
00133 
00134 
00135 
00143 #define FINISH_ELEMENT_START(_elementName_)                                     \
00144 {                                                                               \
00145   if (!elementHasAttributes)                                                    \
00146     {                                                                           \
00147       /* element hasn't attributes --> we can push it to the stack */           \
00148       /* and emit startElement with no attributes */                            \
00149       /* push element to the stack */                                           \
00150       elementStack->push(new unsigned long(elementId));                         \
00151                                                                                 \
00152       SAFE_CALL_EMITTER(startElement(userData, ec->name, 0));                   \
00153                                                                                 \
00154       /* Element information is now complete */                                 \
00155       elementHasAttributes = false;                                             \
00156       attributeListComplete = false;                                            \
00157     }                                                                           \
00158   else                                                                          \
00159     if (attributeListComplete)                                                  \
00160       {                                                                         \
00161         const XmlChar **attrs;                                                  \
00162         size_t attrPos = 0;                                                     \
00163                                                                                 \
00164         NEW(attrs, const XmlChar *[dataQueue->count()+1]);                      \
00165         attrs[dataQueue->count()] = 0;                                          \
00166         /* attributes and the element name decoded */                           \
00167         while ((dataQueueItem = dataQueue->dequeue()))                          \
00168           {                                                                     \
00169             /* an attribute name or an attribute value */                       \
00170             attrs[attrPos] = dataQueueItem->data;                               \
00171             attrPos++;                                                          \
00172           }                                                                     \
00173                                                                                 \
00174         elementStack->push(new unsigned long(elementId));                       \
00175         SAFE_CALL_EMITTER(startElement(userData, _elementName_, attrs));        \
00176                                                                                 \
00177         /* delete attribute values */                                           \
00178         /* attribute names can't be deleted, since they */                      \
00179         /* are in the element list!                     */                      \
00180         for (attrPos = 0; attrs[attrPos]; attrPos += 2)                         \
00181           {                                                                     \
00182             DELETE(attrs[attrPos+1]);                                           \
00183           }                                                                     \
00184                                                                                 \
00185         DELETE_ARRAY(attrs);                                                    \
00186                                                                                 \
00187         elementHasAttributes = false;                                           \
00188         attributeListComplete = false;                                          \
00189       }                                                                         \
00190     else                                                                        \
00191       {                                                                         \
00192         /* some attributes or the element still need to be decoded */           \
00193         /* enqueue the decoded name                                */           \
00194         NEW(dataQueueItem, DataQueueItem);                                      \
00195         dataQueueItem->type = Characters;                                       \
00196         dataQueueItem->data = ec->name;                                         \
00197         dataQueue->enqueue(dataQueueItem);                                      \
00198       }                                                                         \
00199   RESET_STATE;                                                                  \
00200 }
00201 
00202 
00203 
00207 XmlSimpleModel::XmlSimpleModel(void)
00208   : XmlModelBase()
00209 {
00210   grammar = 0;
00211 
00212   state = StructuralSymbols::None;
00213   
00214   buffer = 0;
00215   bufferLength = 0;
00216 
00217   fibBits = 0;
00218   fibCode = 0;
00219   fibLastWasOne = false;
00220   
00221   NEW(dataQueue, DataQueue);
00222   dataQueue->setAutoDelete(true);
00223 
00224   saxEmitter = 0;
00225   userData = 0;
00226 
00227   //    dataInEncoding = UTF_8;
00228   //    dataOutEncoding = UTF_8;
00229 
00230   inCDATA = false;
00231   inDoctype = false;
00232   elementHasAttributes = false;
00233   attributeListComplete = false;
00234 
00235   //stack of element ids
00236   NEW(elementStack, Stack<unsigned long>);
00237 
00238   elementStack->setAutoDelete(true);
00239 
00240   NEW(elementIds, ElementIds);
00241   elementIds->setAutoDelete(true);
00242 
00243   NEW(elements, SimpleElementTable);
00244   elements->setAutoDelete(true);
00245 
00246   //list for containing all element nnames
00247   //used just for freeing all the data
00248   NEW(elementNamesList, ElementNamesList);
00249   elementNamesList->setAutoDelete(true);
00250 
00251   elementCounter = 1;
00252 }
00253 
00254 
00258 XmlSimpleModel::~XmlSimpleModel(void)
00259 {
00260   DELETE(elements);
00261   DELETE(elementStack);
00262 
00263 
00264   //display the number of elements and attributes if in verbose mode
00265   if (ExaltOptions::getOption(ExaltOptions::Verbose) == ExaltOptions::Yes)
00266     {
00267       OUTPUTENL("  Number of elements/attributes:\t" << elementNamesList->count());
00268       OUTPUTEENDLINE;
00269     }
00270 
00271 
00272 
00273   DELETE(elementNamesList);
00274 
00275   DELETE(dataQueue);
00276 }
00277 
00278 
00285 void XmlSimpleModel::setSAXEmitter(SAXEmitter *emitter, void *data = 0)
00286 {
00287   saxEmitter = emitter;
00288   userData = data;
00289 }
00290 
00291 
00297 bool XmlSimpleModel::manageEvent(XmlModelEvent *event)
00298 {
00299   XmlStartElementEvent *startElementEvent;
00300   XmlEndElementEvent *endElementEvent;
00301   XmlCharactersEvent *charactersEvent;
00302   XmlCommentEvent *commentEvent;
00303   XmlPIEvent *piEvent;
00304   XmlDeclEvent *xmlDeclEvent;
00305   XmlStartDoctypeEvent *xmlStartDoctypeEvent;
00306 
00307   XmlEntityDeclEvent *xmlEntityDeclEvent;
00308   XmlNotationDeclEvent *xmlNotationDeclEvent;
00309   XmlDefaultEvent *defaultEvent;
00310 
00311   XmlChar *elName, *attrName;
00312   unsigned long *elId, *attrId;
00313   unsigned long elementId, attributeId;
00314 
00315   SimpleElementContext *ec;
00316   size_t fibItems;
00317   XmlChar fibBuf[30];
00318   Encodings::MIB mib;
00319 
00320   //examine the event type and perform appropriate actions
00321   switch (event->type)
00322     {
00323       /**** XML declaration ****/
00324     case XmlModelEvent::XmlDecl:
00325       xmlDeclEvent = (XmlDeclEvent *)event;
00326 
00327       if (!textCodec)
00328         //use default text codec
00329         createDefaultTextCodec();
00330 
00331       mib = textCodec->getMIB(xmlDeclEvent->encoding);
00332           
00333       grammar->setAlphabetBaseSize(textCodec->suggestAlphabetBaseSize(mib));
00334 
00335       grammar->append(StructuralSymbols::XmlDecl);
00336 
00337       APPEND_ALL_DATA(grammar, xmlDeclEvent->version, true);
00338 
00339       if (xmlDeclEvent->encoding)
00340         {
00341           APPEND_ALL_DATA(grammar, xmlDeclEvent->encoding, true);
00342         }
00343       else
00344         grammar->append(StructuralSymbols::EndOfBlock);
00345 
00346       if (xmlDeclEvent->standalone != -1)
00347         {
00348           if (xmlDeclEvent->standalone)
00349             grammar->append(StructuralSymbols::StandaloneYes);
00350           else
00351             grammar->append(StructuralSymbols::StandaloneNo);
00352         }
00353       else
00354         grammar->append(StructuralSymbols::StandaloneNotSpecified);
00355 
00356       break;
00357 
00358 
00359 
00360       /**** Doctype ****/
00361     case XmlModelEvent::StartDoctype:
00362       xmlStartDoctypeEvent = (XmlStartDoctypeEvent *)event;
00363 
00364       grammar->append(StructuralSymbols::Doctype);
00365 
00366       if (xmlStartDoctypeEvent->doctypeName)
00367         {
00368           APPEND_ALL_DATA(grammar, xmlStartDoctypeEvent->doctypeName, true);
00369         }
00370       else
00371         grammar->append(StructuralSymbols::EndOfBlock);
00372 
00373       if (xmlStartDoctypeEvent->publicId)
00374         {
00375           if (!xmlStartDoctypeEvent->publicId[0])
00376             {
00377               grammar->append(StructuralSymbols::EmptyString);
00378               grammar->append(StructuralSymbols::EndOfBlock);
00379             }
00380           else
00381             {
00382               APPEND_ALL_DATA(grammar, xmlStartDoctypeEvent->publicId, true);
00383             }
00384         }
00385       else
00386         grammar->append(StructuralSymbols::EndOfBlock);
00387           
00388       if (xmlStartDoctypeEvent->systemId)
00389         {
00390           if (!xmlStartDoctypeEvent->systemId[0])
00391             {
00392               grammar->append(StructuralSymbols::EmptyString);
00393               grammar->append(StructuralSymbols::EndOfBlock);
00394             }
00395           else
00396             {
00397               APPEND_ALL_DATA(grammar, xmlStartDoctypeEvent->systemId, true);
00398             }
00399         }
00400       else
00401         grammar->append(StructuralSymbols::EndOfBlock);
00402           
00403       if (xmlStartDoctypeEvent->hasInternalSubset)
00404         grammar->append(StructuralSymbols::HasInternalSubsetYes);
00405       else
00406         grammar->append(StructuralSymbols::HasInternalSubsetNo);
00407       //        }
00408       //        else
00409       //        inDoctype = false;
00410 
00411           
00412       break;
00413 
00414       /**** End Doctype ****/
00415     case XmlModelEvent::EndDoctype:
00416       grammar->append(StructuralSymbols::Doctype);
00417       break;
00418 
00419 
00420       /**** Entity declaration ****/
00421     case XmlModelEvent::EntityDecl:
00422       xmlEntityDeclEvent = (XmlEntityDeclEvent *)event;
00423 
00424       grammar->append(StructuralSymbols::EntityDecl);
00425 
00426       if (xmlEntityDeclEvent->entityName)
00427         {
00428           APPEND_ALL_DATA(grammar, xmlEntityDeclEvent->entityName, true);
00429         }
00430       else
00431         grammar->append(StructuralSymbols::EndOfBlock);
00432 
00433      
00434       if (!xmlEntityDeclEvent->valueLength && xmlEntityDeclEvent->value)
00435         {
00436           //zero length and value not NULL
00437           grammar->append(StructuralSymbols::EmptyString);
00438           grammar->append(StructuralSymbols::EndOfBlock);
00439         }
00440       else
00441         {
00442           if (xmlEntityDeclEvent->value)
00443             {
00444               //value of nonzero length
00445               APPEND_ALL_DATA_LENGTH(grammar, xmlEntityDeclEvent->value, xmlEntityDeclEvent->valueLength, true);
00446             }
00447           else
00448             //value is NULL
00449             grammar->append(StructuralSymbols::EndOfBlock);
00450         }
00451 
00452 
00453       if (xmlEntityDeclEvent->systemId)
00454         {
00455           if (!xmlEntityDeclEvent->systemId[0])
00456             {
00457               grammar->append(StructuralSymbols::EmptyString);
00458               grammar->append(StructuralSymbols::EndOfBlock);
00459             }
00460           else
00461             {
00462               APPEND_ALL_DATA(grammar, xmlEntityDeclEvent->systemId, true);
00463             }
00464         }
00465       else
00466         grammar->append(StructuralSymbols::EndOfBlock);
00467 
00468       if (xmlEntityDeclEvent->publicId)
00469         {
00470           if (!xmlEntityDeclEvent->publicId[0])
00471             {
00472               grammar->append(StructuralSymbols::EmptyString);
00473               grammar->append(StructuralSymbols::EndOfBlock);
00474             }
00475           else
00476             {
00477               APPEND_ALL_DATA(grammar, xmlEntityDeclEvent->publicId, true);
00478             }
00479         }
00480       else
00481         grammar->append(StructuralSymbols::EndOfBlock);
00482 
00483       if (xmlEntityDeclEvent->notationName)
00484         {
00485           if (!xmlEntityDeclEvent->notationName[0])
00486             {
00487               grammar->append(StructuralSymbols::EmptyString);
00488               grammar->append(StructuralSymbols::EndOfBlock);
00489             }
00490           else
00491             {
00492               APPEND_ALL_DATA(grammar, xmlEntityDeclEvent->notationName, true);
00493             }
00494         }
00495       else
00496         grammar->append(StructuralSymbols::EndOfBlock);
00497 
00498       if (xmlEntityDeclEvent->isParameterEntity)
00499         grammar->append(StructuralSymbols::IsParameterEntityYes);
00500       else
00501         grammar->append(StructuralSymbols::IsParameterEntityNo);
00502 
00503       break;
00504 
00505 
00506       /**** Notation declaration ****/
00507     case XmlModelEvent::NotationDecl:
00508       xmlNotationDeclEvent = (XmlNotationDeclEvent *)event;
00509 
00510       grammar->append(StructuralSymbols::NotationDecl);
00511 
00512       if (xmlNotationDeclEvent->notationName)
00513         {
00514           APPEND_ALL_DATA(grammar, xmlNotationDeclEvent->notationName, true);
00515         }
00516       else
00517         grammar->append(StructuralSymbols::EndOfBlock);
00518 
00519      
00520 
00521       if (xmlNotationDeclEvent->systemId)
00522         {
00523           if (!xmlNotationDeclEvent->systemId[0])
00524             {
00525               grammar->append(StructuralSymbols::EmptyString);
00526               grammar->append(StructuralSymbols::EndOfBlock);
00527             }
00528           else
00529             {
00530               APPEND_ALL_DATA(grammar, xmlNotationDeclEvent->systemId, true);
00531             }
00532         }
00533       else
00534         grammar->append(StructuralSymbols::EndOfBlock);
00535 
00536       if (xmlNotationDeclEvent->publicId)
00537         {
00538           if (!xmlNotationDeclEvent->publicId[0])
00539             {
00540               grammar->append(StructuralSymbols::EmptyString);
00541               grammar->append(StructuralSymbols::EndOfBlock);
00542             }
00543           else
00544             {
00545               APPEND_ALL_DATA(grammar, xmlNotationDeclEvent->publicId, true);
00546             }
00547         }
00548       else
00549         grammar->append(StructuralSymbols::EndOfBlock);
00550 
00551 
00552       break;
00553 
00554 
00555       /**** Start of element ****/
00556     case XmlModelEvent::StartElement:
00557       startElementEvent = (XmlStartElementEvent *)event;
00558 
00559       if (startElementEvent->attr[0])
00560         {
00561           //append "end of attributes" symbol to the grammar
00562           //because the element has some attributes
00563           grammar->append(StructuralSymbols::Attributes);
00564 
00565           //encode the attributes
00566           for (size_t i = 0; startElementEvent->attr[i]; i += 2)
00567             {
00568               if (!(attrId = elementIds->find((XmlChar *)startElementEvent->attr[i])))
00569                 {
00570                   //unknown attribute started
00571                   //--> we'll create new element context for it
00572 
00573 
00574 
00575                   NEW(attrName, XmlChar[xmlchar_strlen(startElementEvent->attr[i]) + 1]);
00576                   xmlchar_strcpy(attrName, startElementEvent->attr[i]);
00577                   elementNamesList->append(attrName);
00578       
00579 
00580                   //insert new element id to elementIds
00581                   attributeId = elementCounter;
00582                   elementIds->insert(attrName, new unsigned long(attributeId));
00583 
00584 
00585 
00586                   NEW(ec, SimpleElementContext);
00587 
00588                   //set unique element id
00589                   ec->name = attrName;
00590 
00591 
00592                   //add new element context to the element hashtable
00593                   elements->insert(attributeId, ec);
00594 
00595 
00596                   //update the element counter
00597                   elementCounter++;
00598 
00599 
00600 
00601                   //append "new element" symbol to the grammar
00602                   grammar->append(StructuralSymbols::NewElement);
00603 
00604                   //append the name of the element to the grammar
00605                   //("end of block" symbol included automatically because of the ending 0
00606                   APPEND_ALL_DATA(grammar, attrName, true);
00607                 }
00608               else
00609                 {
00610                   //known attribute started
00611                   //--> update its element context
00612                   ec = elements->find(*attrId);
00613                   CHECK_POINTER(ec);
00614 
00615                   attributeId = *attrId;
00616 
00617 
00618                   //append "known element" symbol to the grammar
00619                   grammar->append(StructuralSymbols::KnownElement);
00620                   fibItems = Fibonacci::encodeToBuffer(fibBuf, SIZEOF_XML_CHAR, attributeId);
00621                   APPEND_BUFFER_DATA(grammar, fibBuf, fibItems);
00622                 }
00623 
00624               //append the attribute value
00625               if (startElementEvent->attr[i+1][0] == 0)
00626                 grammar->append(StructuralSymbols::EmptyString);
00627               else
00628                 APPEND_ALL_DATA(grammar, startElementEvent->attr[i+1], true);
00629             }
00630 
00631           //append "end of block" after the element name to indicate that the description of an element with attributes ended
00632           grammar->append(StructuralSymbols::EndOfBlock);
00633 
00634         }
00635 
00636       if (!(elId = elementIds->find((XmlChar *)startElementEvent->name)))
00637         {
00638           //unknown element started
00639           //--> we'll create new element context for it
00640 
00641 
00642 
00643           NEW(elName, XmlChar[xmlchar_strlen(startElementEvent->name) + 1]);
00644           xmlchar_strcpy(elName, startElementEvent->name);
00645           elementNamesList->append(elName);
00646       
00647 
00648           //insert new element id to elementIds
00649           elementId = elementCounter;
00650           elementIds->insert(elName, new unsigned long(elementId));
00651 
00652           //DBG("New element " << (const char *)startElementEvent->name << " (" << elementId << ") started");
00653 
00654 
00655           NEW(ec, SimpleElementContext);
00656 
00657           //set unique element id
00658           ec->name = elName;
00659 
00660 
00661 
00662           //add new element context to the element hashtable
00663           elements->insert(elementId, ec);
00664 
00665 
00666           //update the element counter
00667           elementCounter++;
00668 
00669 
00670 
00671           //append "new element" symbol to the grammar
00672           grammar->append(StructuralSymbols::NewElement);
00673 
00674           //append the name of the element to the grammar
00675           //("end of block" symbol included automatically because of the ending 0
00676           APPEND_ALL_DATA(grammar, startElementEvent->name, true);
00677 
00678           //              DBG("NEW ELEMENT: "<< startElementEvent->name);
00679         }
00680       else
00681         {
00682           //known element started
00683           //--> update its element context
00684           ec = elements->find(*elId);
00685           CHECK_POINTER(ec);
00686 
00687           elementId = *elId;
00688 
00689 
00690           //      DBG("KNOWN: ");
00691 
00692           //append "known element" symbol to the grammar
00693           grammar->append(StructuralSymbols::KnownElement);
00694 
00695           fibItems = Fibonacci::encodeToBuffer(fibBuf, SIZEOF_XML_CHAR, elementId);
00696           APPEND_BUFFER_DATA(grammar, fibBuf, fibItems);
00697 
00698           //              DBG(startElementEvent->name << " (" << elementId << ") ITEMS: " << fibItems << " CODE: ");
00699           //              for (unsigned int uu = 0; uu < fibItems; uu++)
00700           //                {
00701           //                  DBG((unsigned int)(unsigned char)fibBuf[uu] << " ");
00702           //                }
00703         }
00704 
00705       //push element name to the stack
00706       elementStack->push(new unsigned long(elementId));
00707 
00708       break;
00709 
00710 
00711 
00712       /**** End of element ****/
00713     case XmlModelEvent::EndElement:
00714       endElementEvent = (XmlEndElementEvent *)event;
00715 
00716       //SimpleElementContext *ec;
00717 
00718       if (!(elId = elementIds->find((XmlChar *)endElementEvent->name)))
00719         {
00720           //end of unknown element occurred
00721           //--> should never happen
00722           FATAL("End of unknown element acurred: " << (const char *)endElementEvent->name);
00723         }
00724       else
00725         {
00726           //known element ended
00727 
00728           //      DBG("  Known element " << (const char *)endElementEvent->name << " ended");
00729         }
00730 
00731       elementStack->pop();
00732 
00733       //append "end element" symbol to the grammar
00734       grammar->append(StructuralSymbols::EndElement);
00735 
00736       //APPEND_ALL_DATA(grammar, endElementEvent->name);
00737       break;
00738 
00739 
00740 
00741       /**** Character data ****/
00742     case XmlModelEvent::Characters:
00743       charactersEvent = (XmlCharactersEvent *)event;
00744 
00745       //        for (int t = 0; t < charactersEvent->length; t++)
00746       //        {
00747       //          DBG(charactersEvent->data[t]);
00748       //        }
00749       APPEND_ALL_DATA_LENGTH(grammar, charactersEvent->data, charactersEvent->length, true);
00750       break;
00751 
00752       /**** Default data ****/
00753     case XmlModelEvent::Default:
00754       defaultEvent = (XmlDefaultEvent *)event;
00755 
00756       //append "default" symbol to the grammar
00757       grammar->append(StructuralSymbols::Default);
00758       APPEND_ALL_DATA_LENGTH(grammar, defaultEvent->data, defaultEvent->length, true);
00759       break;
00760 
00761 
00762       /**** Comment ****/
00763     case XmlModelEvent::Comment:
00764       commentEvent = (XmlCommentEvent *)event;
00765 
00766       //append "comment" symbol to the grammar
00767       grammar->append(StructuralSymbols::Comment);
00768 
00769       APPEND_ALL_DATA(grammar, commentEvent->data, true);
00770       break;
00771 
00772 
00773       /**** StartCDATA ****/
00774     case XmlModelEvent::StartCDATA:
00775       //startCDATAEvent = (XmlStartCDATAEvent *)event;
00776 
00777       //append "comment" symbol to the grammar
00778       grammar->append(StructuralSymbols::CDATA);
00779       break;
00780 
00781       /**** EndCDATA ****/
00782     case XmlModelEvent::EndCDATA:
00783       //endCDATAEvent = (XmlEndCDATAEvent *)event;
00784 
00785       //append "comment" symbol to the grammar
00786       grammar->append(StructuralSymbols::CDATA);
00787       break;
00788 
00789 
00790 
00791       /**** Processing instruction ****/
00792     case XmlModelEvent::PI:
00793       piEvent = (XmlPIEvent *)event;
00794 
00795       //append "pi" symbol to the grammar
00796       grammar->append(StructuralSymbols::PI);
00797 
00798       if (piEvent->target)
00799         {
00800           //      DBG(piEvent->target);
00801           APPEND_ALL_DATA(grammar, piEvent->target, true);
00802 
00803           if (piEvent->data)
00804             {
00805               //              DBG(piEvent->data);
00806               APPEND_ALL_DATA(grammar, piEvent->data, true);
00807             }
00808           else
00809             grammar->append(StructuralSymbols::EndOfBlock);
00810         }
00811       else
00812         grammar->append(StructuralSymbols::EndOfBlock);
00813       
00814       break;
00815 
00816     default:
00817       WRN("Unknown XML event: " << event->type);
00818       DELETE(event);
00819       return false;
00820     }
00821 
00822   //delete the event structure
00823   DELETE(event);
00824   return true;
00825 }
00826 
00827 
00828 
00835 void XmlSimpleModel::receiveData(XmlChar *data, size_t size)
00836 {
00837   bool stateChanged = false;
00838   DataQueueItem *dataQueueItem;
00839   SimpleElementContext *ec;
00840   XmlChar *elName;
00841   unsigned long elementId;
00842   unsigned long *elId;
00843 
00844   //some variables for working with Fibonacci codes
00845   unsigned char fibTmpChar, fibMask;
00846   bool fibFinished = false;
00847 
00848 
00849   if (!buffer)
00850     {
00851       NEW(buffer, XmlChar[XML_MODEL_BUFFER_DEFAULT_SIZE]);
00852       RESET_BUFFER;
00853     }
00854 
00855   for (size_t i = 0; i < size; i++)
00856     {
00857       //        DBG((unsigned int)data[i]);
00858       if (state != StructuralSymbols::KnownElement)
00859         {
00860           switch (data[i])
00861             {
00862               //case StructuralSymbols::EndOfBlock:
00863               //DBG("END OF BLOCK");
00864               //          state = StructuralSymbols::XmlDecl;
00865               //break;
00866 
00867             case StructuralSymbols::XmlDecl:
00868               //DBG("XML DECL");
00869 
00870               CHANGE_STATE(StructuralSymbols::XmlDecl);
00871               if (stateChanged)
00872                 {
00873                   continue;
00874                 }
00875               break;
00876 
00877             case StructuralSymbols::Attributes:
00878               //DBG("ATTRIBUTES");
00879               elementHasAttributes = true;
00880 
00881               //          if (!inAttributeSection)
00882               //            {
00883               //              //attribute section ended
00884               //              RESET_BUFFER;
00885               //              RESET_STATE;
00886 
00887               //              //emit startElementEvent
00888               //              //              SAFE_CALL_EMITTER();
00889               //            }
00890               continue;
00891               break;
00892 
00893             case StructuralSymbols::NewElement:
00894               //DBG("NEW ELEMENT");
00895 
00896               CHANGE_STATE(StructuralSymbols::NewElement);
00897               if (stateChanged)
00898                 {
00899                   continue;
00900                 }
00901               break;
00902 
00903             case StructuralSymbols::KnownElement:
00904               //DBG("KNOWN ELEMENT");
00905 
00906               CHANGE_STATE(StructuralSymbols::KnownElement);
00907               if (stateChanged)
00908                 {
00909                   continue;
00910                 }
00911               break;
00912 
00913             case StructuralSymbols::EndElement:
00914               //DBG("END ELEMENT: ");
00915 
00916               CHANGE_STATE(StructuralSymbols::None);
00917 
00918               if (stateChanged)
00919                 {
00920                   elId = elementStack->pop();
00921                   CHECK_POINTER(elId);
00922                   ec = elements->find(*elId);
00923 
00924                   //DBG("** " << ec->name);
00925               
00926                   SAFE_CALL_EMITTER(endElement(userData, ec->name));
00927                   continue;
00928                 }
00929               else
00930                 {
00931                   //DBG("!!!!!!!!! " << state);
00932                   //DBG((unsigned int)data[i]);
00933                 }
00934 
00935               break;
00936 
00937             case StructuralSymbols::Default:
00938               //DBG("COMMENT");
00939               CHANGE_STATE(StructuralSymbols::Default);
00940               if (stateChanged)
00941                 {
00942                   continue;
00943                 }
00944               break;
00945 
00946 
00947             case StructuralSymbols::Comment:
00948               //DBG("COMMENT");
00949               CHANGE_STATE(StructuralSymbols::Comment);
00950               if (stateChanged)
00951                 {
00952                   continue;
00953                 }
00954               break;
00955 
00956             case StructuralSymbols::CDATA:
00957               //DBG("CDATA");
00958               if (!inCDATA)
00959                 {
00960                   CHANGE_STATE(StructuralSymbols::CDATA);
00961                   if (stateChanged)
00962                     {
00963                       SAFE_CALL_EMITTER(startCDATASection(userData));
00964                       inCDATA = true;
00965                       continue;
00966                     }
00967                 }
00968               else
00969                 {
00970                   CHANGE_STATE(StructuralSymbols::None);
00971                   if (stateChanged)
00972                     {
00973                       SAFE_CALL_EMITTER(endCDATASection(userData));
00974                       inCDATA = false;
00975                       continue;
00976                     }
00977                 }
00978               break;
00979 
00980             case StructuralSymbols::PI:
00981               //          DBG("PI");
00982               CHANGE_STATE(StructuralSymbols::PI);
00983               if (stateChanged)
00984                 {
00985                   continue;
00986                 }
00987               break;
00988 
00989             case StructuralSymbols::EntityDecl:
00990               //          DBG("ENTITY DECL");
00991               CHANGE_STATE(StructuralSymbols::EntityDecl);
00992               if (stateChanged)
00993                 {
00994                   continue;
00995                 }
00996               break;
00997 
00998             case StructuralSymbols::NotationDecl:
00999               //          DBG("NOTATION DECL");
01000               CHANGE_STATE(StructuralSymbols::NotationDecl);
01001               if (stateChanged)
01002                 {
01003                   continue;
01004                 }
01005               break;
01006 
01007             case StructuralSymbols::Doctype:
01008               //          DBG("DOCTYPE");
01009               if (!inDoctype)
01010                 {
01011                   CHANGE_STATE(StructuralSymbols::Doctype);
01012                   inDoctype = true;
01013 
01014                   if (stateChanged)
01015                     {
01016                       continue;
01017                     }
01018                 }
01019               else
01020                 {
01021                   SAFE_CALL_EMITTER(endDoctypeDecl(userData));
01022                   inDoctype = false;
01023                   continue;
01024                 }
01025               break;
01026 
01027             default:
01028               if (!data[i] && !bufferLength && elementHasAttributes)
01029                 {
01030                   //end of the attribute section of the element
01031                   attributeListComplete = true;
01032                   continue;
01033                 }
01034             }
01035         }
01036 
01037       switch (state)
01038         {
01039           //        case StructuralSymbols::EndOfBlock:
01040           //          //state = StructuralSymbols::None;
01041           //          break;
01042           
01043         case StructuralSymbols::XmlDecl:
01044           //DBG("comparing: " << (int)data[i]);
01045           if (data[i] == StructuralSymbols::StandaloneYes || data[i] == StructuralSymbols::StandaloneNo
01046               || (data[i] == StructuralSymbols::StandaloneNotSpecified && dataQueue->count() == 2))
01047             {
01048               //DBG("!!!!");
01049               //read standalone information
01050               XmlChar *version;
01051               XmlChar *encoding;
01052               int standalone;
01053 
01054               switch(data[i])
01055                 {
01056                 case StructuralSymbols::StandaloneYes:
01057                   standalone = 1;
01058                   break;
01059 
01060                 case StructuralSymbols::StandaloneNo:
01061                   standalone = 0;
01062                   break;
01063 
01064                 default:
01065                   standalone = -1;
01066                 }
01067 
01068               version = dataQueue->dequeue()->data;
01069               encoding = dataQueue->dequeue()->data;
01070 
01071               SAFE_CALL_EMITTER(xmlDecl(userData, version, encoding, standalone));
01072 
01073               if (version)
01074                 DELETE(version);
01075 
01076               if (encoding)
01077                 DELETE(encoding);
01078 
01079               RESET_STATE;
01080               RESET_BUFFER;
01081             }
01082           else
01083             if (data[i] == StructuralSymbols::EndOfBlock)
01084               {
01085                 //DBG("@");
01086                 //read version or encoding
01087                 NEW(dataQueueItem, DataQueueItem);
01088                 dataQueueItem->type = Characters;
01089 
01090                 if (bufferLength)
01091                   {
01092                     buffer[bufferLength] = 0;
01093                     NEW(dataQueueItem->data, XmlChar[xmlchar_strlen(buffer)+1]);
01094                     xmlchar_strcpy(dataQueueItem->data, buffer);
01095                   }
01096                 else
01097                   //data not present (can occur for encoding)
01098                   dataQueueItem->data = 0;
01099 
01100                 dataQueue->enqueue(dataQueueItem);
01101 
01102                 RESET_BUFFER;
01103 
01104               }
01105             else
01106               {
01107                 //read next character of xml declaration data
01108                 buffer[bufferLength] = data[i];
01109                 bufferLength++;
01110               }
01111           break;
01112 
01113         case StructuralSymbols::PI:
01114           if (data[i] == StructuralSymbols::EndOfBlock && dataQueue->count() == 1)
01115             {
01116               XmlChar *target;
01117 
01118               target = dataQueue->dequeue()->data;
01119 
01120               buffer[bufferLength] = 0;
01121               //value was specified
01122               SAFE_CALL_EMITTER(processingInstruction(userData, target, buffer));
01123 
01124               if (target)
01125                 DELETE(target);
01126 
01127               RESET_BUFFER;
01128               RESET_STATE;
01129             }
01130           else
01131             if (data[i] == StructuralSymbols::EndOfBlock)
01132               {
01133                 //read target value
01134                 NEW(dataQueueItem, DataQueueItem);
01135                 dataQueueItem->type = Characters;
01136 
01137                 if (bufferLength)
01138                   {
01139                     buffer[bufferLength] = 0;
01140                     NEW(dataQueueItem->data, XmlChar[xmlchar_strlen(buffer)+1]);
01141                     xmlchar_strcpy(dataQueueItem->data, buffer);
01142                   }
01143                 else
01144                   //data not present (can occur for encoding)
01145                   dataQueueItem->data = 0;
01146 
01147                 dataQueue->enqueue(dataQueueItem);
01148 
01149                 RESET_BUFFER;
01150 
01151               }
01152             else
01153               {
01154                 //read next character of xml target or value data
01155                 buffer[bufferLength] = data[i];
01156                 bufferLength++;
01157               }
01158           break;
01159 
01160         case StructuralSymbols::NewElement:
01161           if (data[i] == StructuralSymbols::EndOfBlock)
01162             {
01163               //decoding new element
01164 
01165 
01166               //add ending \0
01167               buffer[bufferLength] = 0;
01168               bufferLength++;
01169 
01170               //save element name
01171               NEW(elName, XmlChar[bufferLength]);
01172               xmlchar_strcpy(elName, buffer);
01173               elementNamesList->append(elName);
01174 
01175               //create an  element contex for the new element
01176               NEW(ec, SimpleElementContext);
01177                   
01178               //set unique element id
01179               ec->name = elName;
01180                   
01181               elementId = elementCounter;
01182                   
01183               //add new element context to the element hashtable
01184 
01185               elements->insert(elementCounter, ec);
01186                   
01187               //update the element counter
01188               elementCounter++;
01189                   
01190 
01191               FINISH_ELEMENT_START(elName);
01192 
01193               //reset buffer
01194               RESET_BUFFER;
01195             }
01196           else
01197             {
01198               //read next character of the incomplete element name
01199               buffer[bufferLength] = data[i];
01200               bufferLength++;
01201             }
01202           break;
01203 
01204         case StructuralSymbols::KnownElement:
01205           //known element started
01206           //--> read complete Fibonacci code of its id
01207 
01208           //construct reverse Fibonacci code (e.g. 1100101 instead of 1010011) from sequence of
01209           //following bytes
01210           fibMask = 1 << (SIZEOF_CHAR*8 - 1);
01211               
01212           //decode symbol using Fibonacci code
01213           fibTmpChar = (unsigned char)data[i];
01214           //DBG("SSS: " << (unsigned int)fibTmpChar);
01215 
01216           while (fibMask)
01217             {
01218               fibCode >>= 1;
01219               fibBits++;
01220                   
01221               if (fibTmpChar & fibMask)
01222                 {
01223                   fibCode = fibCode | ((unsigned long)1) << (SIZEOF_UNSIGNED_LONG*8-1);
01224                       
01225                   if (!fibLastWasOne)
01226                     fibLastWasOne = true;
01227                   else
01228                     {
01229                       //Fibonacci code is now complete
01230                       //--> decode the symbol number
01231                       fibCode >>= (SIZEOF_UNSIGNED_LONG*8 - fibBits);
01232                       elementId = Fibonacci::decode(fibCode);
01233                       fibFinished = true;
01234                       break;
01235                     }
01236                 }
01237               else
01238                 fibLastWasOne = false;
01239                   
01240               fibMask >>= 1;
01241             }
01242 
01243           if (fibFinished)
01244             {
01245               //DBG("FIB DECODED: " << elementId);
01246               ec = elements->find(elementId);
01247 
01248               CHECK_POINTER(ec);
01249 
01250 
01251               FINISH_ELEMENT_START(ec->name);
01252 
01253 
01254               //complete Fibonacci code read and decoded
01255               //reset fibCode, and fibBits
01256               fibCode = 0;
01257               fibBits = 0;
01258               fibLastWasOne = false;
01259 
01260               RESET_BUFFER;
01261               RESET_STATE;
01262             }
01263 
01264           break;
01265 
01266         case StructuralSymbols::Default:
01267           //comment data
01268           if (data[i] == StructuralSymbols::EndOfBlock)
01269             {
01270               //buffer[bufferLength] = data[i];
01271               SAFE_CALL_EMITTER(defaultHandler(userData, buffer, bufferLength));
01272               RESET_STATE;
01273               RESET_BUFFER;
01274             }
01275           else
01276             {
01277               //read next character of the default data
01278               buffer[bufferLength] = data[i];
01279               bufferLength++;
01280 
01281               if (bufferLength == XML_MODEL_BUFFER_DEFAULT_SIZE)
01282                 {
01283                   //data larger than the buffer --> split it
01284                   SAFE_CALL_EMITTER(defaultHandler(userData, buffer, bufferLength));
01285                   RESET_BUFFER;
01286                 }
01287             }
01288 
01289           break;
01290 
01291         case StructuralSymbols::Comment:
01292           //comment data
01293           if (data[i] == StructuralSymbols::EndOfBlock)
01294             {
01295               buffer[bufferLength] = 0;
01296               SAFE_CALL_EMITTER(comment(userData, buffer));
01297               RESET_STATE;
01298               RESET_BUFFER;
01299             }
01300           else
01301             {
01302               //read next character of the comment data
01303               buffer[bufferLength] = data[i];
01304               bufferLength++;
01305 
01306               if (bufferLength == XML_MODEL_BUFFER_DEFAULT_SIZE)
01307                 {
01308                   //data larger than the buffer --> split it
01309                   SAFE_CALL_EMITTER(comment(userData, buffer));
01310                   RESET_BUFFER;
01311                 }
01312             }
01313 
01314           break;
01315 
01316           //        case StructuralSymbols::PI:
01317           //          RESET_STATE;
01318           //          break;
01319 
01320         case StructuralSymbols::Doctype:
01321           if (/*data[i] == StructuralSymbols::EndOfBlock &&*/ dataQueue->count() == 3)
01322             {
01323               XmlChar *doctype, *publicId, *systemId;
01324 
01325               doctype = dataQueue->dequeue()->data;
01326               publicId = dataQueue->dequeue()->data;
01327               systemId = dataQueue->dequeue()->data;
01328 
01329               SAFE_CALL_EMITTER(startDoctypeDecl(userData, doctype, systemId, publicId,
01330                                                  data[0] == StructuralSymbols::HasInternalSubsetYes));
01331 
01332               if (doctype)
01333                 DELETE(doctype);
01334 
01335               if (publicId)
01336                 DELETE(publicId);
01337 
01338               if (systemId)
01339                 DELETE(systemId);
01340 
01341               RESET_BUFFER;
01342               RESET_STATE;
01343             }
01344           else
01345             if (data[i] == StructuralSymbols::EndOfBlock)
01346               {
01347                 //read target value
01348                 NEW(dataQueueItem, DataQueueItem);
01349                 dataQueueItem->type = Characters;
01350 
01351                 if (bufferLength)
01352                   {
01353                     if (buffer[0] == StructuralSymbols::EmptyString)
01354                       //we read an empty string
01355                       buffer[0] = 0;
01356                     else
01357                       buffer[bufferLength] = 0;
01358 
01359                     NEW(dataQueueItem->data, XmlChar[xmlchar_strlen(buffer)+1]);
01360                     xmlchar_strcpy(dataQueueItem->data, buffer);
01361                   }
01362                 else
01363                   //data not present
01364                   dataQueueItem->data = 0;
01365 
01366                 dataQueue->enqueue(dataQueueItem);
01367 
01368                 RESET_BUFFER;
01369               }
01370             else
01371               {
01372                 //read next character
01373                 buffer[bufferLength] = data[i];
01374                 bufferLength++;
01375               }
01376           break;
01377 
01378 
01379         case StructuralSymbols::EntityDecl:
01380           if (dataQueue->count() == 5)  //minus base!
01381             {
01382               //readind isparameterentity (the last record)
01383               XmlChar *entityName, *value, /* *base,*/ *publicId, *systemId, *notationName;
01384               int valueLength;
01385 
01386               entityName = dataQueue->dequeue()->data;
01387               value = dataQueue->dequeue()->data;
01388               //              base = dataQueue->dequeue()->data;
01389               systemId = dataQueue->dequeue()->data;
01390               publicId = dataQueue->dequeue()->data;
01391               notationName = dataQueue->dequeue()->data;
01392 
01393 
01394               if (!value)
01395                 valueLength = 0;
01396               else
01397                 valueLength = xmlchar_strlen(value);
01398 
01399               SAFE_CALL_EMITTER(entityDecl(userData, entityName, data[0] == StructuralSymbols::IsParameterEntityYes,
01400                                            value, valueLength, 0/*base*/, systemId, publicId, notationName));
01401 
01402               if (entityName)
01403                 DELETE(entityName);
01404 
01405               if (value)
01406                 DELETE(value);
01407 
01408               //              if (base)
01409               //                DELETE(base);
01410 
01411               if (systemId)
01412                 DELETE(systemId);
01413 
01414               if (publicId)
01415                 DELETE(publicId);
01416 
01417               if (notationName)
01418                 DELETE(notationName);
01419 
01420               RESET_BUFFER;
01421               RESET_STATE;
01422             }
01423           else
01424             if (data[i] == StructuralSymbols::EndOfBlock)
01425               {
01426                 //read target value
01427                 NEW(dataQueueItem, DataQueueItem);
01428                 dataQueueItem->type = Characters;
01429 
01430                 if (bufferLength)
01431                   {
01432                     if (buffer[0] == StructuralSymbols::EmptyString)
01433                       //we read an empty string
01434                       buffer[0] = 0;
01435                     else
01436                       buffer[bufferLength] = 0;
01437 
01438                     NEW(dataQueueItem->data, XmlChar[xmlchar_strlen(buffer)+1]);
01439                     xmlchar_strcpy(dataQueueItem->data, buffer);
01440                   }
01441                 else
01442                   //data not present
01443                   dataQueueItem->data = 0;
01444 
01445                 dataQueue->enqueue(dataQueueItem);
01446 
01447                 RESET_BUFFER;
01448               }
01449             else
01450               {
01451                 //read next character
01452                 buffer[bufferLength] = data[i];
01453                 bufferLength++;
01454               }
01455 
01456           break;
01457 
01458         case StructuralSymbols::NotationDecl:
01459           if (data[i] == StructuralSymbols::EndOfBlock)
01460             {
01461               //read target value
01462               NEW(dataQueueItem, DataQueueItem);
01463               dataQueueItem->type = Characters;
01464               
01465               if (bufferLength)
01466                 {
01467                   if (buffer[0] == StructuralSymbols::EmptyString)
01468                     //we read an empty string
01469                     buffer[0] = 0;
01470                   else
01471                     buffer[bufferLength] = 0;
01472                   
01473                   NEW(dataQueueItem->data, XmlChar[xmlchar_strlen(buffer)+1]);
01474                   xmlchar_strcpy(dataQueueItem->data, buffer);
01475                 }
01476               else
01477                 //data not present
01478                 dataQueueItem->data = 0;
01479               
01480               dataQueue->enqueue(dataQueueItem);
01481               
01482               RESET_BUFFER;
01483 
01484               //complete information read?
01485               if (dataQueue->count() == 3)      //minus base!
01486                 {
01487                   //readind isparameterentity (the last record)
01488                   XmlChar *notationName, /* *base,*/ *publicId, *systemId;
01489                   
01490                   notationName = dataQueue->dequeue()->data;
01491                   //          base = dataQueue->dequeue()->data;
01492                   systemId = dataQueue->dequeue()->data;
01493                   publicId = dataQueue->dequeue()->data;
01494                   
01495                   SAFE_CALL_EMITTER(notationDecl(userData, notationName, 0/*base*/, systemId, publicId));
01496 
01497                   if (notationName)
01498                     DELETE(notationName);
01499 
01500                   //              if (base)
01501                   //                DELETE(base);
01502 
01503                   if (systemId)
01504                     DELETE(systemId);
01505                   
01506                   if (publicId)
01507                     DELETE(publicId);
01508                   
01509                   if (notationName)
01510                     DELETE(notationName);
01511 
01512                   RESET_STATE;
01513                 }
01514             }
01515           else
01516             {
01517               //read next character
01518               buffer[bufferLength] = data[i];
01519               bufferLength++;
01520             }
01521 
01522           break;
01523 
01524         default:
01525           //characters
01526           if (data[i] == StructuralSymbols::EmptyString)
01527             {
01528               //empty string is ignored unless it's an empty value of an attribute
01529               if (elementHasAttributes)
01530                 {
01531                   //read a value of an attribute
01532                   XmlChar *attrValue;
01533                   
01534                   //copy the value from buffer to attrValue
01535                   NEW(attrValue, XmlChar[1]);
01536                   attrValue[0] = 0;
01537                   
01538                   NEW(dataQueueItem, DataQueueItem);
01539                   dataQueueItem->type = Characters;
01540                   dataQueueItem->data = attrValue;
01541                   dataQueue->enqueue(dataQueueItem);
01542                 }
01543             }
01544           else
01545             if (data[i] == StructuralSymbols::EndOfBlock)
01546               {
01547                 if (elementHasAttributes)
01548                   {
01549                     //read a value of an attribute
01550                     XmlChar *attrValue;
01551 
01552                     //copy the value from buffer to attrValue
01553                     NEW(attrValue, XmlChar[bufferLength + 1]);
01554                     buffer[bufferLength] = 0;
01555                     xmlchar_strcpy(attrValue, buffer);
01556 
01557                     NEW(dataQueueItem, DataQueueItem);
01558                     dataQueueItem->type = Characters;
01559                     dataQueueItem->data = attrValue;
01560                     dataQueue->enqueue(dataQueueItem);
01561                   }
01562                 else
01563                   {
01564                     if (bufferLength)
01565                       //send data only if they aren't empty
01566                       SAFE_CALL_EMITTER(characterData(userData, buffer, bufferLength));
01567                   }
01568 
01569                 RESET_BUFFER;
01570                 RESET_STATE;
01571               }
01572             else
01573               {
01574                 buffer[bufferLength] = data[i];
01575                 bufferLength++;
01576                   
01577                 if (bufferLength == XML_MODEL_BUFFER_DEFAULT_SIZE)
01578                   {
01579                     //character data larger than the buffer --> split it
01580                     SAFE_CALL_EMITTER(characterData(userData, buffer, bufferLength));
01581                     RESET_BUFFER;
01582                   }
01583               }
01584         }
01585     }
01586 }

Generated on Wed Feb 5 10:43:03 2003 for Exalt by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002