00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00027 #ifdef __GNUG__
00028 # pragma implementation
00029 #endif
00030
00031
00032 #include "xmlparser.h"
00033
00034
00035
00039
00040
00041
00042 static void startElementHandler_wrapper(void *userData, const XmlChar *el, const XmlChar **attr)
00043 {
00044 ParseInfo *inf = (ParseInfo *)userData;
00045
00046 inf->parser->startElement(userData, el, attr);
00047 }
00048
00049
00050 static void endElementHandler_wrapper(void *userData, const XmlChar *el)
00051 {
00052 ParseInfo *inf = (ParseInfo *)userData;
00053
00054 inf->parser->endElement(userData, el);
00055 }
00056
00057
00058 static void characterDataHandler_wrapper(void *userData,
00059 const XmlChar *data,
00060 int length)
00061 {
00062 ParseInfo *inf = (ParseInfo *)userData;
00063
00064 inf->parser->characterData(userData, data, length);
00065 }
00066
00067 static void processingInstructionHandler_wrapper(void *userData,
00068 const XmlChar *target,
00069 const XmlChar *data)
00070 {
00071 ParseInfo *inf = (ParseInfo *)userData;
00072
00073 inf->parser->processingInstruction(userData, target, data);
00074 }
00075
00076 static void commentHandler_wrapper(void *userData,
00077 const XmlChar *data)
00078 {
00079 ParseInfo *inf = (ParseInfo *)userData;
00080
00081 inf->parser->comment(userData, data);
00082 }
00083
00084 static void startCDATASectionHandler_wrapper(void *userData)
00085 {
00086 ParseInfo *inf = (ParseInfo *)userData;
00087
00088 inf->parser->startCDATASection(userData);
00089 }
00090
00091
00092 static void endCDATASectionHandler_wrapper(void *userData)
00093 {
00094 ParseInfo *inf = (ParseInfo *)userData;
00095
00096 inf->parser->endCDATASection(userData);
00097 }
00098
00099 static void defaultHandler_wrapper(void *userData,
00100 const XmlChar *data,
00101 int length)
00102 {
00103 ParseInfo *inf = (ParseInfo *)userData;
00104
00105 inf->parser->defaultHandler(userData, data, length);
00106 }
00107
00108 static int unknownEncodingHandler_wrapper(void *unknownEncodingData,
00109 const XmlChar *name,
00110 XML_Encoding *info)
00111 {
00112 ParseInfo *inf = (ParseInfo *)unknownEncodingData;
00113
00114 return inf->parser->unknownEncoding(unknownEncodingData, name, info);
00115 }
00116
00117 static int convert_wrapper(void *data, const char *s)
00118 {
00119 EncodingInfo *inf = (EncodingInfo *)data;
00120
00121 return inf->textCodec->convert(s, inf->mib);
00122 }
00123
00124 static void release_wrapper(void *data)
00125 {
00126 EncodingInfo *inf = (EncodingInfo *)data;
00127
00128 inf->textCodec->release(inf->mib);
00129 }
00130
00131 static void xmlDeclHandler_wrapper(void *userData,
00132 const XmlChar *version,
00133 const XmlChar *encoding,
00134 int standalone)
00135 {
00136 ParseInfo *inf = (ParseInfo *)userData;
00137
00138 inf->parser->xmlDecl(userData, version, encoding, standalone);
00139 }
00140
00141 static void startDoctypeDeclHandler_wrapper(void *userData,
00142 const XmlChar *doctypeName,
00143 const XmlChar *systemId,
00144 const XmlChar *publicId,
00145 int hasInternalSubset)
00146 {
00147 ParseInfo *inf = (ParseInfo *)userData;
00148
00149 inf->parser->startDoctypeDecl(userData, doctypeName, systemId, publicId, hasInternalSubset);
00150 }
00151
00152 static void endDoctypeDeclHandler_wrapper(void *userData)
00153 {
00154 ParseInfo *inf = (ParseInfo *)userData;
00155
00156 inf->parser->endDoctypeDecl(userData);
00157 }
00158
00159
00160 static void entityDeclHandler_wrapper(void *userData,
00161 const XmlChar *entityName,
00162 int isParameterEntity,
00163 const XmlChar *value,
00164 int valueLength,
00165 const XmlChar *base,
00166 const XmlChar *systemId,
00167 const XmlChar *publicId,
00168 const XmlChar *notationName)
00169 {
00170 ParseInfo *inf = (ParseInfo *)userData;
00171
00172 inf->parser->entityDecl(userData, entityName, isParameterEntity, value, valueLength, base, systemId, publicId, notationName);
00173 }
00174
00175
00176 static void notationDeclHandler_wrapper(void *userData,
00177 const XmlChar *notationName,
00178 const XmlChar *base,
00179 const XmlChar *systemId,
00180 const XmlChar *publicId)
00181 {
00182 ParseInfo *inf = (ParseInfo *)userData;
00183
00184 inf->parser->notationDecl(userData, notationName, base, systemId, publicId);
00185 }
00186
00187
00188
00189
00193
00194
00195
00199 XmlParser::XmlParser(void)
00200 : XmlParserBase()
00201 {
00202 initParser(0);
00203 }
00204
00205
00211 XmlParser::XmlParser(const XmlChar *encoding)
00212 : XmlParserBase(encoding)
00213 {
00214 initParser(encoding);
00215 }
00216
00217
00223 void XmlParser::initParser(const XmlChar *encoding)
00224 {
00225
00226 innerParser = XML_ParserCreate(encoding);
00227 CHECK_POINTER(innerParser);
00228
00229
00230
00231 ParseInfo *parseInfo;
00232 NEW(parseInfo, ParseInfo);
00233 parseInfo->parser = this;
00234 XML_SetUserData(innerParser, parseInfo);
00235
00236
00237
00238 XML_SetStartElementHandler(innerParser, startElementHandler_wrapper);
00239 XML_SetEndElementHandler(innerParser, endElementHandler_wrapper);
00240 XML_SetCharacterDataHandler(innerParser, characterDataHandler_wrapper);
00241 XML_SetProcessingInstructionHandler(innerParser, processingInstructionHandler_wrapper);
00242 XML_SetCommentHandler(innerParser, commentHandler_wrapper);
00243 XML_SetStartCdataSectionHandler(innerParser, startCDATASectionHandler_wrapper);
00244 XML_SetEndCdataSectionHandler(innerParser, endCDATASectionHandler_wrapper);
00245 XML_SetDefaultHandler(innerParser, defaultHandler_wrapper);
00246 XML_SetUnknownEncodingHandler(innerParser, unknownEncodingHandler_wrapper, parseInfo);
00247 XML_SetXmlDeclHandler(innerParser, xmlDeclHandler_wrapper);
00248 XML_SetStartDoctypeDeclHandler(innerParser, startDoctypeDeclHandler_wrapper);
00249 XML_SetEndDoctypeDeclHandler(innerParser, endDoctypeDeclHandler_wrapper);
00250 XML_SetEntityDeclHandler(innerParser, entityDeclHandler_wrapper);
00251 XML_SetNotationDeclHandler(innerParser, notationDeclHandler_wrapper);
00252
00253
00254
00255
00256
00257
00258
00259
00260
00261 XML_SetParamEntityParsing(innerParser, XML_PARAM_ENTITY_PARSING_NEVER);
00262
00263 charactersTotal = 0;
00264 }
00265
00266
00267
00271 XmlParser::~XmlParser(void)
00272 {
00273
00274 if (ExaltOptions::getOption(ExaltOptions::Verbose) == ExaltOptions::Yes)
00275 {
00276 OUTPUTENL("Input document statistics");
00277 OUTPUTENL(" Character data size: \t\t\t" << charactersTotal);
00278 }
00279
00280 XML_ParserFree(innerParser);
00281 deleteDefaultTextCodec();
00282 }
00283
00284
00285
00291 long XmlParser::parse(void) throw (ExaltParserException, ExaltEncodingException, ExaltIOException)
00292 {
00293 bool final;
00294
00295
00296 if (!inputDevice)
00297 {
00298 FATAL("No input device has been specified!");
00299 }
00300 else
00301 {
00302 while (inputDevice->readData(xmlDataBuff, XML_BUFFER_SIZE) == ReadOk)
00303 {
00304 final = inputDevice->bytesRead() < XML_BUFFER_SIZE;
00305
00306 if (!XML_Parse(innerParser, xmlDataBuff, inputDevice->bytesRead(), final))
00307 {
00308 reportError();
00309 return 0;
00310 }
00311 else
00312 if (final)
00313 {
00314 return XML_GetCurrentByteIndex(innerParser);
00315 }
00316
00317 }
00318
00319 if (inputDevice->errorOccurred())
00320 FATAL("Error reading from input device!");
00321
00322 if (!XML_Parse(innerParser, xmlDataBuff, inputDevice->bytesRead(), true))
00323 {
00324 reportError();
00325 return 0;
00326 }
00327 else
00328 {
00329 return XML_GetCurrentByteIndex(innerParser);
00330 }
00331 }
00332
00333 return 0;
00334 }
00335
00336
00337
00345 long XmlParser::parsePush(const char *data, int length, bool isFinal)
00346 throw (ExaltParserException, ExaltEncodingException, ExaltIOException)
00347 {
00348 if (!XML_Parse(innerParser, data, length, isFinal))
00349 {
00350 reportError();
00351 return 0;
00352 }
00353 else
00354 if (isFinal)
00355 {
00356 return XML_GetCurrentByteIndex(innerParser);
00357 }
00358
00359 return 0;
00360 }
00361
00362
00363
00364
00365
00369
00370
00371
00375 void XmlParser::reportError(void)
00376 {
00377 PARSERERR("Line " << XML_GetCurrentLineNumber(innerParser) << \
00378 ", column " << XML_GetCurrentColumnNumber(innerParser) << \
00379 ": " << XML_ErrorString(XML_GetErrorCode(innerParser)));
00380
00381 throw ExaltParseErrorException();
00382 }
00383
00384
00385
00395 void XmlParser::startElement(void *userData, const XmlChar *name, const XmlChar **attr)
00396 {
00397 ParseInfo *parseInfo = (ParseInfo *)userData;
00398
00399 parseInfo->depth++;
00400
00401 #ifdef PRINT_SAX
00402 XmlChar *ee;
00403 NEW(ee, XmlChar[xmlchar_strlen(name)+10]);
00404
00405 xmlchar_strcpy(ee, "<");
00406 xmlchar_strcat(ee, name);
00407 xmlchar_strcat(ee, ">");
00408
00409 OUTPUT(ee);
00410
00411 DELETE_ARRAY(ee);
00412
00413 if (attr)
00414 if (attr[0])
00415 {
00416 OUTPUT(": ");
00417 for (int i = 0; attr[i]; i += 2)
00418 OUTPUT(attr[i] << " = " << attr[i + 1] << " ");
00419 }
00420 #endif
00421
00422
00423
00424 XmlStartElementEvent *ev;
00425 NEW(ev, XmlStartElementEvent(name, attr));
00426
00427 exposeEvent(ev);
00428 }
00429
00430
00431
00440 void XmlParser::endElement(void *userData,
00441 const XmlChar *name)
00442 {
00443 ParseInfo *parseInfo = (ParseInfo *)userData;
00444
00445
00446 parseInfo->depth--;
00447
00448 #ifdef PRINT_SAX
00449 XmlChar *ee;
00450 NEW(ee, XmlChar[xmlchar_strlen(name)+10]);
00451
00452 xmlchar_strcpy(ee, "</");
00453 xmlchar_strcat(ee, name);
00454 xmlchar_strcat(ee, ">");
00455 OUTPUT(ee);
00456
00457 DELETE_ARRAY(ee);
00458 #endif
00459
00460 XmlEndElementEvent *ev;
00461 NEW(ev, XmlEndElementEvent(name));
00462
00463 exposeEvent(ev);
00464 }
00465
00466
00467
00479 void XmlParser::characterData(void *userData,
00480 const XmlChar *data,
00481 int length)
00482 {
00483 #ifdef PRINT_SAX
00484 for (int i = 0; i < length; i++)
00485 OUTPUT(data[i]);
00486 #endif
00487
00488 XmlCharactersEvent *ev;
00489 NEW(ev, XmlCharactersEvent(data, length));
00490
00491 exposeEvent(ev);
00492
00493 charactersTotal += length;
00494 }
00495
00496
00497
00507 void XmlParser::processingInstruction(void *userData,
00508 const XmlChar *target,
00509 const XmlChar *data)
00510 {
00511 #ifdef PRINT_SAX
00512 OUTPUT(target << " = " << data);
00513 #endif
00514
00515 XmlPIEvent *ev;
00516 NEW(ev, XmlPIEvent(target, data));
00517
00518 exposeEvent(ev);
00519 }
00520
00521
00522
00531 void XmlParser::comment(void *userData,
00532 const XmlChar *data)
00533 {
00534 #ifdef PRINT_SAX
00535 OUTPUT(data);
00536 #endif
00537
00538 XmlCommentEvent *ev;
00539 NEW(ev, XmlCommentEvent(data));
00540
00541 exposeEvent(ev);
00542 }
00543
00544
00545
00553 void XmlParser::startCDATASection(void *userData)
00554 {
00555 #ifdef PRINT_SAX
00556 OUTPUT("start CDATA");
00557 #endif
00558
00559 XmlStartCDATAEvent *ev;
00560 NEW(ev, XmlStartCDATAEvent);
00561
00562 exposeEvent(ev);
00563 }
00564
00565
00566
00574 void XmlParser::endCDATASection(void *userData)
00575 {
00576 #ifdef PRINT_SAX
00577 OUTPUT("end CDATA");
00578 #endif
00579
00580 XmlEndCDATAEvent *ev;
00581 NEW(ev, XmlEndCDATAEvent);
00582
00583 exposeEvent(ev);
00584 }
00585
00586
00587
00597 void XmlParser::defaultHandler(void *userData,
00598 const XmlChar *data,
00599 int length)
00600 {
00601
00602 #ifdef PRINT_SAX
00603 for (int i = 0; i < length; i++)
00604 OUTPUT(data[i]);
00605 OUTPUT("DEFAULT");
00606 #endif
00607
00608 XmlDefaultEvent *ev;
00609 NEW(ev, XmlDefaultEvent(data, length));
00610
00611 exposeEvent(ev);
00612 }
00613
00614
00615
00626 int XmlParser::unknownEncoding(void *unknownEncodingData,
00627 const XmlChar *name,
00628 XML_Encoding *info)
00629 {
00630 #ifdef PRINT_SAX
00631 OUTPUT("Unknown encoding: " << name);
00632 #endif
00633
00634 if (!textCodec)
00635 {
00636
00637 createDefaultTextCodec();
00638 }
00639
00640 Encodings::MIB mib;
00641
00642 try
00643 {
00644 mib = textCodec->getMIB(name);
00645 }
00646 catch (ExaltUnknownEncodingException)
00647 {
00648
00649 return 0;
00650 }
00651
00652 if (mib == Encodings::Unknown)
00653 {
00654
00655
00656
00657 return 0;
00658 }
00659
00660
00661 if (!textCodec->isAbleToConvert(mib))
00662 {
00663
00664 return 0;
00665 }
00666 else
00667 {
00668
00669 textCodec->fillInMapArray(info, mib);
00670 info->convert = convert_wrapper;
00671 info->release = release_wrapper;
00672
00673 EncodingInfo *ei;
00674 NEW(ei, EncodingInfo);
00675
00676 ei->textCodec = textCodec;
00677 ei->mib = mib;
00678
00679 info->data = (void *)ei;
00680
00681 return 1;
00682 }
00683 }
00684
00685
00686
00697 void XmlParser::xmlDecl(void *userData,
00698 const XmlChar *version,
00699 const XmlChar *encoding,
00700 int standalone)
00701 {
00702 #ifdef PRINT_SAX
00703 OUTPUT("XML declaration: version = " << version << ", encoding = " << encoding << ", standalone = " << standalone);
00704 #endif
00705
00706 XmlDeclEvent *ev;
00707 NEW(ev, XmlDeclEvent(version, encoding, standalone));
00708
00709 exposeEvent(ev);
00710
00711 }
00712
00713
00714
00726 void XmlParser::startDoctypeDecl(void *userData,
00727 const XmlChar *doctypeName,
00728 const XmlChar *systemId,
00729 const XmlChar *publicId,
00730 int hasInternalSubset)
00731 {
00732 #ifdef PRINT_SAX
00733 OUTPUT("Start Doctype: doctypeName = " << doctypeName << ", systemId = " << systemId << ", publicId = " << publicId << ", has internal subset = " << hasInternalSubset);
00734 #endif
00735
00736 XmlStartDoctypeEvent *ev;
00737 NEW(ev, XmlStartDoctypeEvent(doctypeName, systemId, publicId, hasInternalSubset));
00738
00739 exposeEvent(ev);
00740 }
00741
00742
00743
00751 void XmlParser::endDoctypeDecl(void *userData)
00752 {
00753 #ifdef PRINT_SAX
00754 OUTPUTNL("End doctype");
00755 #endif
00756
00757 XmlEndDoctypeEvent *ev;
00758 NEW(ev, XmlEndDoctypeEvent());
00759
00760 exposeEvent(ev);
00761 }
00762
00763
00764
00780 void XmlParser::entityDecl(void *userData,
00781 const XmlChar *entityName,
00782 int isParameterEntity,
00783 const XmlChar *value,
00784 int valueLength,
00785 const XmlChar *base,
00786 const XmlChar *systemId,
00787 const XmlChar *publicId,
00788 const XmlChar *notationName)
00789 {
00790 #ifdef PRINT_SAX
00791 OUTPUT("Entity declaration: name = " << entityName << ", isParameterEntity " << isParameterEntity << ", valueLength = " << valueLength << ", base = " << base << ", systemId = " << systemId << ", publicId = " << publicId << ", notationName = " << notationName);
00792 #endif
00793
00794 XmlEntityDeclEvent *ev;
00795 NEW(ev, XmlEntityDeclEvent(entityName, isParameterEntity, value, valueLength, systemId, publicId, notationName));
00796
00797 exposeEvent(ev);
00798 }
00799
00800
00812 void XmlParser::notationDecl(void *userData,
00813 const XmlChar *notationName,
00814 const XmlChar *base,
00815 const XmlChar *systemId,
00816 const XmlChar *publicId)
00817 {
00818 #ifdef PRINT_SAX
00819 OUTPUT("Notation declaration: name = " << notationName << ", base = " << base << ", systemId = " << systemId << ", publicId = " << publicId);
00820 #endif
00821
00822 XmlNotationDeclEvent *ev;
00823 NEW(ev, XmlNotationDeclEvent(notationName, systemId, publicId));
00824
00825 exposeEvent(ev);
00826 }
00827
00828
00829
00830
00836 void XmlParser::exposeEvent(XmlModelEvent *ev)
00837 {
00838 if (xmlModel)
00839 xmlModel->manageEvent(ev);
00840 else
00841 WRN("No XML model has been specified for the parser!");
00842 }
00843
00844
00845