Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members  

xmlparser.cpp

Go to the documentation of this file.
00001 /***************************************************************************
00002     xmlparser.cpp  -  Definitions of class XmlParser methods
00003                              -------------------
00004     begin                : June 21 2002
00005     copyright            : (C) 2003 by Vojtìch Toman
00006     email                : vtoman@lit.cz
00007  ***************************************************************************/
00008 
00009 /***************************************************************************
00010  *                                                                         *
00011  *   This program is free software; you can redistribute it and/or modify  *
00012  *   it under the terms of the GNU General Public License as published by  *
00013  *   the Free Software Foundation; either version 2 of the License, or     *
00014  *   (at your option) any later version.                                   *
00015  *                                                                         *
00016  ***************************************************************************/
00017 
00018 
00027 #ifdef __GNUG__
00028 # pragma implementation
00029 #endif
00030 
00031 
00032 #include "xmlparser.h"
00033 
00034 
00035 
00039 
00040 
00041 
00042 static void startElementHandler_wrapper(void *userData, const XmlChar *el, const XmlChar **attr)
00043 {
00044   ParseInfo *inf = (ParseInfo *)userData;
00045   
00046   inf->parser->startElement(userData, el, attr);
00047 }
00048 
00049 
00050 static void endElementHandler_wrapper(void *userData, const XmlChar *el)
00051 {
00052   ParseInfo *inf = (ParseInfo *)userData;
00053   
00054   inf->parser->endElement(userData, el);
00055 }
00056 
00057 
00058 static void characterDataHandler_wrapper(void *userData,
00059                                          const XmlChar *data,
00060                                          int length)
00061 {
00062   ParseInfo *inf = (ParseInfo *)userData;
00063   
00064   inf->parser->characterData(userData, data, length);
00065 }
00066 
00067 static void processingInstructionHandler_wrapper(void *userData,
00068                                                  const XmlChar *target,
00069                                                  const XmlChar *data)
00070 {
00071   ParseInfo *inf = (ParseInfo *)userData;
00072   
00073   inf->parser->processingInstruction(userData, target, data);
00074 }
00075 
00076 static void commentHandler_wrapper(void *userData,
00077                                    const XmlChar *data)
00078 {
00079   ParseInfo *inf = (ParseInfo *)userData;
00080   
00081   inf->parser->comment(userData, data);
00082 }
00083 
00084 static void startCDATASectionHandler_wrapper(void *userData)
00085 {
00086   ParseInfo *inf = (ParseInfo *)userData;
00087   
00088   inf->parser->startCDATASection(userData);
00089 }
00090 
00091 
00092 static void endCDATASectionHandler_wrapper(void *userData)
00093 {
00094   ParseInfo *inf = (ParseInfo *)userData;
00095   
00096   inf->parser->endCDATASection(userData);
00097 }
00098 
00099 static void defaultHandler_wrapper(void *userData,
00100                                    const XmlChar *data,
00101                                    int length)
00102 {
00103   ParseInfo *inf = (ParseInfo *)userData;
00104   
00105   inf->parser->defaultHandler(userData, data, length);
00106 }
00107 
00108 static int unknownEncodingHandler_wrapper(void *unknownEncodingData,
00109                                           const XmlChar *name,
00110                                           XML_Encoding *info)
00111 {
00112   ParseInfo *inf = (ParseInfo *)unknownEncodingData;
00113   
00114   return inf->parser->unknownEncoding(unknownEncodingData, name, info);
00115 }
00116 
00117 static int convert_wrapper(void *data, const char *s)
00118 {
00119   EncodingInfo *inf = (EncodingInfo *)data;
00120 
00121   return inf->textCodec->convert(s, inf->mib);
00122 }
00123 
00124 static void release_wrapper(void *data)
00125 {
00126   EncodingInfo *inf = (EncodingInfo *)data;
00127 
00128   inf->textCodec->release(inf->mib);
00129 }
00130 
00131 static void xmlDeclHandler_wrapper(void *userData,
00132                                    const XmlChar *version,
00133                                    const XmlChar *encoding,
00134                                    int standalone)
00135 {
00136   ParseInfo *inf = (ParseInfo *)userData;
00137   
00138   inf->parser->xmlDecl(userData, version, encoding, standalone);
00139 }
00140 
00141 static void startDoctypeDeclHandler_wrapper(void *userData,
00142                                             const XmlChar *doctypeName,
00143                                             const XmlChar *systemId,
00144                                             const XmlChar *publicId,
00145                                             int hasInternalSubset)
00146 {
00147   ParseInfo *inf = (ParseInfo *)userData;
00148   
00149   inf->parser->startDoctypeDecl(userData, doctypeName, systemId, publicId, hasInternalSubset);
00150 }
00151 
00152 static void endDoctypeDeclHandler_wrapper(void *userData)
00153 {
00154   ParseInfo *inf = (ParseInfo *)userData;
00155   
00156   inf->parser->endDoctypeDecl(userData);
00157 }
00158 
00159 
00160 static void entityDeclHandler_wrapper(void *userData,
00161                                       const XmlChar *entityName,
00162                                       int isParameterEntity,
00163                                       const XmlChar *value,
00164                                       int valueLength,
00165                                       const XmlChar *base,
00166                                       const XmlChar *systemId,
00167                                       const XmlChar *publicId,
00168                                       const XmlChar *notationName)
00169 {
00170   ParseInfo *inf = (ParseInfo *)userData;
00171   
00172   inf->parser->entityDecl(userData, entityName, isParameterEntity, value, valueLength, base, systemId, publicId, notationName);
00173 }
00174 
00175 
00176 static void notationDeclHandler_wrapper(void *userData,
00177                                         const XmlChar *notationName,
00178                                         const XmlChar *base,
00179                                         const XmlChar *systemId,
00180                                         const XmlChar *publicId)
00181 {
00182   ParseInfo *inf = (ParseInfo *)userData;
00183   
00184   inf->parser->notationDecl(userData, notationName, base, systemId, publicId);
00185 }
00186 
00187 
00188 
00189 
00193 
00194 
00195 
00199 XmlParser::XmlParser(void)
00200   : XmlParserBase()
00201 { 
00202   initParser(0);
00203 }
00204 
00205 
00211 XmlParser::XmlParser(const XmlChar *encoding)
00212   : XmlParserBase(encoding)
00213 {
00214   initParser(encoding);
00215 }
00216 
00217 
00223 void XmlParser::initParser(const XmlChar *encoding)
00224 {
00225   //create an expat parser
00226   innerParser = XML_ParserCreate(encoding);
00227   CHECK_POINTER(innerParser);
00228 
00229 
00230   //set user data
00231   ParseInfo *parseInfo;
00232   NEW(parseInfo, ParseInfo);
00233   parseInfo->parser = this;
00234   XML_SetUserData(innerParser, parseInfo);
00235 
00236   
00237   //and now... set all the callbacks
00238   XML_SetStartElementHandler(innerParser, startElementHandler_wrapper);
00239   XML_SetEndElementHandler(innerParser, endElementHandler_wrapper);
00240   XML_SetCharacterDataHandler(innerParser, characterDataHandler_wrapper);
00241   XML_SetProcessingInstructionHandler(innerParser, processingInstructionHandler_wrapper);
00242   XML_SetCommentHandler(innerParser, commentHandler_wrapper);
00243   XML_SetStartCdataSectionHandler(innerParser, startCDATASectionHandler_wrapper);
00244   XML_SetEndCdataSectionHandler(innerParser, endCDATASectionHandler_wrapper);
00245   XML_SetDefaultHandler(innerParser, defaultHandler_wrapper);
00246   XML_SetUnknownEncodingHandler(innerParser, unknownEncodingHandler_wrapper, parseInfo);
00247   XML_SetXmlDeclHandler(innerParser, xmlDeclHandler_wrapper);
00248   XML_SetStartDoctypeDeclHandler(innerParser, startDoctypeDeclHandler_wrapper);
00249   XML_SetEndDoctypeDeclHandler(innerParser, endDoctypeDeclHandler_wrapper);
00250   XML_SetEntityDeclHandler(innerParser, entityDeclHandler_wrapper);
00251   XML_SetNotationDeclHandler(innerParser, notationDeclHandler_wrapper);
00252 //    XML_SetExternalEntityRefHandler(innerParser, externalEntityRefHandler_wrapper);
00253 //    XML_SetStartNamespaceDeclHandler(innerParser, startNamespaceDeclHandler_wrapper);
00254 //    XML_SetEndNamespaceDeclHandler(innerParser, endNamespaceDeclHandler_wrapper);
00255 //    XML_SetElementDeclHandler(innerParser, elementDeclHandler_wrapper);
00256 //    XML_SetAttlistDeclHandler(innerParser, attlistDeclHandler_wrapper);
00257 //    XML_SetNotStandaloneHandler(innerParser, notStandaloneHandler_wrapper);
00258 
00259 
00260   //disable parameter entity parsing
00261   XML_SetParamEntityParsing(innerParser, XML_PARAM_ENTITY_PARSING_NEVER);
00262 
00263   charactersTotal = 0;
00264 }
00265 
00266 
00267 
00271 XmlParser::~XmlParser(void)
00272 {
00273   //in verbose mode, display the statistics
00274   if (ExaltOptions::getOption(ExaltOptions::Verbose) == ExaltOptions::Yes)
00275     {
00276       OUTPUTENL("Input document statistics");
00277       OUTPUTENL("  Character data size: \t\t\t" << charactersTotal);
00278     }
00279 
00280   XML_ParserFree(innerParser);
00281   deleteDefaultTextCodec();
00282 }
00283 
00284 
00285 
00291 long XmlParser::parse(void) throw (ExaltParserException, ExaltEncodingException, ExaltIOException)
00292 {
00293   bool final;
00294 
00295 
00296   if (!inputDevice)
00297     {
00298       FATAL("No input device has been specified!");
00299     }
00300   else
00301     {
00302       while (inputDevice->readData(xmlDataBuff, XML_BUFFER_SIZE) == ReadOk)
00303         {
00304           final = inputDevice->bytesRead() < XML_BUFFER_SIZE;
00305           
00306           if (!XML_Parse(innerParser, xmlDataBuff, inputDevice->bytesRead(), final))
00307             {
00308               reportError();
00309               return 0;
00310             }
00311           else
00312             if (final)
00313               {
00314                 return XML_GetCurrentByteIndex(innerParser);
00315               }
00316           
00317         }
00318       
00319       if (inputDevice->errorOccurred())
00320         FATAL("Error reading from input device!");
00321       
00322       if (!XML_Parse(innerParser, xmlDataBuff, inputDevice->bytesRead(), true))
00323         {
00324           reportError();
00325           return 0;
00326         }
00327       else
00328         {
00329           return XML_GetCurrentByteIndex(innerParser);
00330         }
00331     }
00332 
00333   return 0;
00334 }
00335 
00336 
00337 
00345 long XmlParser::parsePush(const char *data, int length, bool isFinal)
00346   throw (ExaltParserException, ExaltEncodingException, ExaltIOException)
00347 {
00348   if (!XML_Parse(innerParser, data, length, isFinal))
00349     {
00350       reportError();
00351       return 0;
00352     }
00353   else
00354     if (isFinal)
00355       {
00356         return XML_GetCurrentByteIndex(innerParser);
00357       }
00358 
00359   return 0;
00360 }
00361 
00362 
00363 
00364 
00365 
00369 
00370 
00371 
00375 void XmlParser::reportError(void)
00376 {
00377   PARSERERR("Line " << XML_GetCurrentLineNumber(innerParser) <<         \
00378             ", column " << XML_GetCurrentColumnNumber(innerParser) <<   \
00379             ": " << XML_ErrorString(XML_GetErrorCode(innerParser)));
00380 
00381   throw ExaltParseErrorException();
00382 }
00383 
00384 
00385 
00395 void XmlParser::startElement(void *userData, const XmlChar *name, const XmlChar **attr)
00396 {
00397   ParseInfo *parseInfo = (ParseInfo *)userData;
00398 
00399   parseInfo->depth++;
00400 
00401 #ifdef PRINT_SAX
00402   XmlChar *ee;
00403   NEW(ee, XmlChar[xmlchar_strlen(name)+10]);
00404 
00405   xmlchar_strcpy(ee, "<");
00406   xmlchar_strcat(ee, name);
00407   xmlchar_strcat(ee, ">");
00408 
00409   OUTPUT(ee);
00410 
00411   DELETE_ARRAY(ee);
00412 
00413   if (attr)
00414     if (attr[0])
00415       {
00416         OUTPUT(": ");
00417         for (int i = 0; attr[i]; i += 2)
00418           OUTPUT(attr[i] << " = " << attr[i + 1] << "   ");
00419       }
00420 #endif
00421 
00422 
00423 
00424   XmlStartElementEvent *ev;
00425   NEW(ev, XmlStartElementEvent(name, attr));
00426 
00427   exposeEvent(ev);
00428 }
00429 
00430 
00431 
00440 void XmlParser::endElement(void *userData,
00441                            const XmlChar *name)
00442 {
00443   ParseInfo *parseInfo = (ParseInfo *)userData;
00444 
00445 
00446   parseInfo->depth--;
00447 
00448 #ifdef PRINT_SAX  
00449   XmlChar *ee;
00450   NEW(ee, XmlChar[xmlchar_strlen(name)+10]);
00451 
00452   xmlchar_strcpy(ee, "</");
00453   xmlchar_strcat(ee, name);
00454   xmlchar_strcat(ee, ">");
00455   OUTPUT(ee);
00456 
00457   DELETE_ARRAY(ee);
00458 #endif
00459 
00460   XmlEndElementEvent *ev;
00461   NEW(ev, XmlEndElementEvent(name));
00462 
00463   exposeEvent(ev);
00464 }
00465 
00466 
00467 
00479 void XmlParser::characterData(void *userData,
00480                               const XmlChar *data,
00481                               int length)
00482 {
00483 #ifdef PRINT_SAX
00484   for (int i = 0; i < length; i++)
00485     OUTPUT(data[i]);
00486 #endif
00487 
00488   XmlCharactersEvent *ev;
00489   NEW(ev, XmlCharactersEvent(data, length));
00490 
00491   exposeEvent(ev);
00492 
00493   charactersTotal += length;
00494 }
00495 
00496 
00497 
00507 void XmlParser::processingInstruction(void *userData,
00508                                       const XmlChar *target,
00509                                       const XmlChar *data)
00510 {
00511 #ifdef PRINT_SAX  
00512   OUTPUT(target << " = " << data);
00513 #endif
00514 
00515   XmlPIEvent *ev;
00516   NEW(ev, XmlPIEvent(target, data));
00517 
00518   exposeEvent(ev);
00519 }
00520 
00521 
00522 
00531 void XmlParser::comment(void *userData,
00532                         const XmlChar *data)
00533 {
00534 #ifdef PRINT_SAX  
00535   OUTPUT(data);
00536 #endif
00537 
00538   XmlCommentEvent *ev;
00539   NEW(ev, XmlCommentEvent(data));
00540 
00541   exposeEvent(ev);
00542 }
00543 
00544 
00545 
00553 void XmlParser::startCDATASection(void *userData)
00554 {
00555 #ifdef PRINT_SAX  
00556   OUTPUT("start CDATA");
00557 #endif
00558 
00559   XmlStartCDATAEvent *ev;
00560   NEW(ev, XmlStartCDATAEvent);
00561 
00562   exposeEvent(ev);
00563 }
00564 
00565 
00566 
00574 void XmlParser::endCDATASection(void *userData)
00575 {
00576 #ifdef PRINT_SAX  
00577   OUTPUT("end CDATA");
00578 #endif
00579 
00580   XmlEndCDATAEvent *ev;
00581   NEW(ev, XmlEndCDATAEvent);
00582 
00583   exposeEvent(ev);
00584 }
00585 
00586 
00587 
00597 void XmlParser::defaultHandler(void *userData,
00598                                const XmlChar *data,
00599                                int length)
00600 {
00601 
00602 #ifdef PRINT_SAX
00603   for (int i = 0; i < length; i++)
00604     OUTPUT(data[i]);
00605   OUTPUT("DEFAULT");
00606 #endif
00607 
00608   XmlDefaultEvent *ev;
00609   NEW(ev, XmlDefaultEvent(data, length));
00610 
00611   exposeEvent(ev);
00612 }
00613 
00614 
00615 
00626 int XmlParser::unknownEncoding(void *unknownEncodingData,
00627                                const XmlChar *name,
00628                                XML_Encoding *info)
00629 {
00630 #ifdef PRINT_SAX  
00631   OUTPUT("Unknown encoding: " << name);
00632 #endif
00633 
00634   if (!textCodec)
00635     {
00636       //use default text codec, if none has been specified
00637       createDefaultTextCodec();
00638     }
00639 
00640   Encodings::MIB mib;
00641 
00642   try
00643     {
00644       mib = textCodec->getMIB(name);
00645     }
00646   catch (ExaltUnknownEncodingException)
00647     {
00648       //encoding not listed among textcodec's encodings
00649       return 0;
00650     }
00651 
00652   if (mib == Encodings::Unknown)
00653     {
00654       //encoding not listed among textcodec's encodings
00655       //catch block should handle this case, this test is
00656       //just for safety
00657       return 0;
00658     }
00659   
00660 
00661   if (!textCodec->isAbleToConvert(mib))
00662     {
00663       //textcodec knows the encoding, but is unable to convert it
00664       return 0;
00665     }
00666   else
00667     {
00668       //textcodec understands the encoding
00669       textCodec->fillInMapArray(info, mib);
00670       info->convert = convert_wrapper;
00671       info->release = release_wrapper;
00672 
00673       EncodingInfo *ei;
00674       NEW(ei, EncodingInfo);
00675 
00676       ei->textCodec = textCodec;
00677       ei->mib = mib;
00678 
00679       info->data = (void *)ei;
00680 
00681       return 1;
00682     }
00683 }
00684 
00685 
00686 
00697 void XmlParser::xmlDecl(void *userData,
00698                         const XmlChar *version,
00699                         const XmlChar *encoding,
00700                         int standalone)
00701 {
00702 #ifdef PRINT_SAX  
00703   OUTPUT("XML declaration: version = " << version << ", encoding = " << encoding << ", standalone = " << standalone);
00704 #endif
00705 
00706   XmlDeclEvent *ev;
00707   NEW(ev, XmlDeclEvent(version, encoding, standalone));
00708 
00709   exposeEvent(ev);
00710 
00711 }
00712 
00713 
00714 
00726 void XmlParser::startDoctypeDecl(void *userData,
00727                                  const XmlChar *doctypeName,
00728                                  const XmlChar *systemId,
00729                                  const XmlChar *publicId,
00730                                  int hasInternalSubset)
00731 {
00732 #ifdef PRINT_SAX  
00733   OUTPUT("Start Doctype: doctypeName = " << doctypeName << ", systemId = " << systemId << ", publicId = " << publicId << ", has internal subset = " << hasInternalSubset);
00734 #endif
00735 
00736   XmlStartDoctypeEvent *ev;
00737   NEW(ev, XmlStartDoctypeEvent(doctypeName, systemId, publicId, hasInternalSubset));
00738 
00739   exposeEvent(ev);
00740 }
00741 
00742 
00743 
00751 void XmlParser::endDoctypeDecl(void *userData)
00752 {
00753 #ifdef PRINT_SAX  
00754   OUTPUTNL("End doctype");
00755 #endif
00756 
00757   XmlEndDoctypeEvent *ev;
00758   NEW(ev, XmlEndDoctypeEvent());
00759 
00760   exposeEvent(ev);
00761 }
00762 
00763 
00764 
00780 void XmlParser::entityDecl(void *userData,
00781                            const XmlChar *entityName,
00782                            int isParameterEntity,
00783                            const XmlChar *value,
00784                            int valueLength,
00785                            const XmlChar *base,
00786                            const XmlChar *systemId,
00787                            const XmlChar *publicId,
00788                            const XmlChar *notationName)
00789 {
00790 #ifdef PRINT_SAX  
00791   OUTPUT("Entity declaration: name = " << entityName << ", isParameterEntity " << isParameterEntity << ", valueLength = " << valueLength << ", base = " << base << ", systemId = " << systemId << ", publicId = " << publicId << ", notationName = " << notationName);
00792 #endif
00793 
00794   XmlEntityDeclEvent *ev;
00795   NEW(ev, XmlEntityDeclEvent(entityName, isParameterEntity, value, valueLength, /* base, */ systemId, publicId, notationName));
00796 
00797   exposeEvent(ev);
00798 }
00799 
00800 
00812 void XmlParser::notationDecl(void *userData,
00813                              const XmlChar *notationName,
00814                              const XmlChar *base,
00815                              const XmlChar *systemId,
00816                              const XmlChar *publicId)
00817 {
00818 #ifdef PRINT_SAX  
00819   OUTPUT("Notation declaration: name = " << notationName << ", base = " << base << ", systemId = " << systemId << ", publicId = " << publicId);
00820 #endif
00821 
00822   XmlNotationDeclEvent *ev;
00823   NEW(ev, XmlNotationDeclEvent(notationName, /* base, */ systemId, publicId));
00824 
00825   exposeEvent(ev);
00826 }
00827 
00828 
00829 
00830 
00836 void XmlParser::exposeEvent(XmlModelEvent *ev)
00837 {
00838   if (xmlModel)
00839     xmlModel->manageEvent(ev);
00840   else
00841     WRN("No XML model has been specified for the parser!");
00842 }
00843 
00844 
00845 

Generated on Wed Feb 5 10:43:03 2003 for Exalt by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002