Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members  

xmlcodec.cpp

Go to the documentation of this file.
00001 /***************************************************************************
00002     xmlcodec.h  -  Definition of XmlCodec class
00003                              -------------------
00004     begin                : June 21 2002
00005     copyright            : (C) 2003 by Vojtìch Toman
00006     email                : vtoman@lit.cz
00007  ***************************************************************************/
00008 
00009 /***************************************************************************
00010  *                                                                         *
00011  *   This program is free software; you can redistribute it and/or modify  *
00012  *   it under the terms of the GNU General Public License as published by  *
00013  *   the Free Software Foundation; either version 2 of the License, or     *
00014  *   (at your option) any later version.                                   *
00015  *                                                                         *
00016  ***************************************************************************/
00017 
00018 
00027 #ifdef __GNUG__
00028 # pragma implementation
00029 #endif
00030 
00031 #include "xmlcodec.h"
00032 
00033 
00035 #define DISPLAY_COMPRESSION_SUMMARY                                                     \
00036 {                                                                                       \
00037   OUTPUTENL("Compression performance");                                                 \
00038   OUTPUTENL("  Input data size: \t\t\t" << parseResult << " B");                        \
00039   double outSize = arithCodec->numberOfOutputBytes() + strlen(VERSION) + 2*SIZEOF_CHAR; \
00040   OUTPUTENL("  Output data size: \t\t\t" << outputDevice->bytesWritten() << " B");      \
00041   OUTPUTENL("  Compression ratio: \t\t\t1:" << (double)(outSize)/parseResult            \
00042             << " (" << (double)(outSize*100)/parseResult << "%)");                      \
00043   OUTPUTENL("  Compression rate: \t\t\t" << (outSize / parseResult) * 8 << " bpc");     \
00044 }
00045 
00046 
00048 #define DISPLAY_DECOMPRESSION_SUMMARY                                           \
00049 {                                                                               \
00050   OUTPUTENL("Decompression performance");                                       \
00051   OUTPUTENL("  Input data size: \t\t\t" << inDevice->bytesRead() << " B");      \
00052   OUTPUTENL("  Restored data size: \t\t\t" << funnelDevice->bytesWritten()      \
00053             << " B (passed to the model for final processing)");                \
00054 }
00055 
00056 
00057 
00061 XmlCodec::XmlCodec(void)
00062   : XmlCodecBase()
00063 {
00064 }
00065 
00066 
00067 
00071 XmlCodec::~XmlCodec(void)
00072 {
00073   deleteDefaultTextCodec();
00074 }
00075 
00076 
00082 void XmlCodec::initializePushCoder(IODevice *outDevice)
00083 {
00084   if (coderType == UnknownCoder)
00085     //coder is now PUSH
00086     coderType = PushCoder;
00087   else
00088     {
00089       if (coderType == PullCoder)
00090         //PULL coder used in PUSH mode
00091         throw ExaltCoderIsPullException();
00092       else
00093         //An attempt to re-initialize a PUSH coder
00094         throw ExaltCoderIsPushException();
00095     }
00096 
00097   inputDevice = 0;
00098   outputDevice = outDevice;
00099 
00100   //Create a KY grammar
00101   NEW(kyGrammar, KYGrammar);
00102 
00103   //Create a coding context for the grammar
00104   NEW(context, Context);
00105   kyGrammar->setContext(context);
00106 
00107   kyGrammar->setOutputDevice(outputDevice);
00108   kyGrammar->setTextCodec(textCodec);
00109 
00110   //Create arithmetic coder/decoder...
00111   NEW(arithCodec, ArithCodec);
00112 
00113   //...and bind it with the contexts
00114   context->setArithCodec(arithCodec);
00115 
00116   //Create a XML parser with specified input device
00117   NEW(xmlParser, XmlParser);
00118   //xmlParser->setInputDevice(inputDevice);
00119 
00120   //Create a XML model...
00121   if (ExaltOptions::getOption(ExaltOptions::Model) == ExaltOptions::SimpleModel)
00122     {
00123       NEW(xmlModel, XmlSimpleModel);
00124       outputDevice->putChar(0);         //to indicate the use of the simple model
00125     }
00126   else
00127     {
00128       NEW(xmlModel, XmlAdaptiveModel);
00129       outputDevice->putChar(1);         //to indicate the use of the adaptive model
00130     }
00131 
00132 
00133   //...and bind it with the parser...
00134   xmlParser->setXmlModel(xmlModel);
00135 
00136   //...plus the grammar
00137   xmlModel->setGrammar(kyGrammar);
00138 
00139 
00140 
00141   //write the version string (excluding terminating \0)
00142   outputDevice->writeData(FILE_STAMP, strlen(FILE_STAMP));
00143   
00144   //store F_BITS and B_BITS being used in output
00145   outputDevice->putChar(F_BITS);
00146   outputDevice->putChar(B_BITS);
00147   
00148   //prepare arithCodec for encoding
00149   arithCodec->setOutputDevice(outputDevice);
00150   arithCodec->startOutputtingBits();
00151   arithCodec->startEncode();
00152 }
00153 
00154 
00155 
00163 bool XmlCodec::encodePush(const char *data, int length, bool isFinal = false)
00164 {
00165   if (coderType == PullCoder)
00166     //PULL coder used in PUSH mode
00167     throw ExaltCoderIsPullException();
00168   else
00169     if (coderType == UnknownCoder)
00170       //Unitialized coder used in PUSH mode
00171       throw ExaltPushCoderNotInitializedException();
00172 
00173 
00174   bool parseResult = xmlParser->parsePush(data, length, isFinal);
00175 
00176   if (isFinal)
00177     {
00178       //Delete all objects
00179       DELETE(xmlParser);
00180       DELETE(xmlModel);
00181 
00182       DELETE(kyGrammar);
00183 
00184 
00185       if (!parseResult)
00186         {
00187           //something was wrong --> cleanup needed
00189         }
00190       else
00191         {
00192           //encode "end of message"
00193           context->encodeEndOfMessage();
00194 
00195           //stop arithCodec
00196           arithCodec->finishEncode();
00197           arithCodec->doneOutputtingBits();
00198         }
00199 
00200 
00201       if (parseResult && ExaltOptions::getOption(ExaltOptions::Verbose) == ExaltOptions::Yes)
00202         {
00203           DISPLAY_COMPRESSION_SUMMARY;
00204         }
00205 
00206       DELETE(arithCodec);
00207       DELETE(context);
00208 
00209       DELETE(xmlModel);
00210     }
00211 
00212   return parseResult;
00213 }
00214 
00215 
00216 
00223 bool XmlCodec::encode(IODevice *inDevice, IODevice *outDevice)
00224 {
00225   inputDevice = inDevice;
00226   outputDevice = outDevice;
00227 
00228   //Create a KY grammar
00229   NEW(kyGrammar, KYGrammar);
00230 
00231   //Create a coding context for the grammar
00232   NEW(context, Context);
00233 
00234   kyGrammar->setOutputDevice(outputDevice);
00235 
00236   kyGrammar->setTextCodec(textCodec);
00237 
00238   kyGrammar->setContext(context);
00239 
00240   //Create arithmetic coder/decoder...
00241   NEW(arithCodec, ArithCodec);
00242 
00243   //...and bind it with the contexts
00244   context->setArithCodec(arithCodec);
00245 
00246   //Create a XML parser with specified input device
00247   NEW(xmlParser, XmlParser);
00248   xmlParser->setInputDevice(inputDevice);
00249 
00250 
00251   //write the version string (excluding terminating \0)
00252   outputDevice->writeData(FILE_STAMP, strlen(FILE_STAMP));
00253 
00254   //Create a XML model...
00255   if (ExaltOptions::getOption(ExaltOptions::Model) == ExaltOptions::SimpleModel)
00256     {
00257       NEW(xmlModel, XmlSimpleModel);
00258       outputDevice->putChar(0);         //to indicate the use of the simple model
00259     }
00260   else
00261     {
00262       NEW(xmlModel, XmlAdaptiveModel);
00263       outputDevice->putChar(1);         //to indicate the use of the simple model
00264     }
00265 
00266   //...and bind it with the parser...
00267   xmlParser->setXmlModel(xmlModel);
00268 
00269   //...plus the grammar
00270   xmlModel->setGrammar(kyGrammar);
00271 
00272 
00273 
00274   
00275   //store F_BITS and B_BITS being used in output
00276   outputDevice->putChar(F_BITS);
00277   outputDevice->putChar(B_BITS);
00278   
00279   //prepare arithCodec for encoding
00280   arithCodec->setOutputDevice(outputDevice);
00281   arithCodec->startOutputtingBits();
00282   arithCodec->startEncode();
00283 
00284   coderType = PullCoder;
00285 
00286   //parse the data
00287   long parseResult = xmlParser->parse();
00288 
00289  
00290   DELETE(kyGrammar);
00291 
00292   //Delete all objects
00293   DELETE(xmlParser);
00294   DELETE(xmlModel);
00295 
00296   if (!parseResult)
00297     {
00298       //something was wrong --> cleanup needed
00300     }
00301   else
00302     {
00303       //encode "end of message"
00304       context->encodeEndOfMessage();
00305 
00306       //stop arithCodec
00307       arithCodec->finishEncode();
00308       arithCodec->doneOutputtingBits();
00309     }
00310 
00311 
00312   if (parseResult && ExaltOptions::getOption(ExaltOptions::Verbose) == ExaltOptions::Yes)
00313     {
00314       DISPLAY_COMPRESSION_SUMMARY;
00315     }
00316 
00317   DELETE(arithCodec);
00318   DELETE(context);
00319 
00320 
00321   //return the result of the parsing
00322   return parseResult;
00323 }
00324 
00325 
00326 
00327 
00335 bool XmlCodec::decode(IODevice *inDevice, SAXReceptor *receptor, void *userData = 0)
00336 {
00337   FunnelDevice *funnelDevice;
00338   char *fileStamp;
00339   size_t fileStampLength = strlen(FILE_STAMP);
00340   int symbol;
00341   int byte1, byte2;
00342   long lastFixedContextSymbol;
00343   RuleElement *rel;
00344   unsigned char tmpChar, mask;
00345   unsigned long fibCode = 0;
00346   bool lastWasOne = false;
00347   bool finished = false;
00348   char bits = 0;
00349   unsigned long alphabetBaseSize;
00350   SAXEmitter *saxEmitter;
00351 
00352 
00353   inputDevice = inDevice;
00354 
00355   //Create a KY grammar
00356   NEW(kyGrammar, KYGrammar);
00357 
00358 
00359   //Create a decoding context for the grammar
00360   NEW(context, Context);
00361 
00362   kyGrammar->setContext(context, false);
00363 
00364   //Create arithmetic coder/decoder...
00365   NEW(arithCodec, ArithCodec);
00366 
00367   //...and bind it with the contexts
00368   context->setArithCodec(arithCodec);
00369 
00370 
00371 
00372   NEW(fileStamp, char[fileStampLength + 1]);
00373 
00374   inputDevice->readData(fileStamp, fileStampLength);
00375   if (inputDevice->errorOccurred() || inputDevice->eof())
00376     return false;
00377 
00378   fileStamp[fileStampLength] = '\0';
00379 
00380   if (strcmp(FILE_STAMP, fileStamp))
00381     {
00382       DELETE(fileStamp);
00383       ERR("File format not recognized!");
00384       throw ExaltUnknownFileFormatException();
00385     }
00386   else
00387     {
00388       DELETE(fileStamp);
00389     }
00390 
00391   //whether to use the simple, or the adaptive model
00392   inputDevice->getChar(&byte1);
00393   if (!byte1)
00394     {
00395       //Create a xmlSimpleModel
00396       NEW(xmlModel, XmlSimpleModel);
00397     }
00398   else
00399     {
00400       //otherwise create the adaptive model
00401       NEW(xmlModel, XmlAdaptiveModel);
00402     }
00403 
00404   NEW(saxEmitter, SAXEmitter(receptor));
00405   xmlModel->setSAXEmitter(saxEmitter, userData);
00406 
00407   NEW(funnelDevice, FunnelDevice(xmlModel, 4096, true));
00408   funnelDevice->prepare();
00409   kyGrammar->setOutputDevice(funnelDevice);
00410   kyGrammar->setTextCodec(textCodec);
00411 
00412     
00413   inputDevice->getChar(&byte1);
00414   if (inputDevice->errorOccurred() || inputDevice->eof())
00415     return false;
00416 
00417   inputDevice->getChar(&byte2);
00418   if (inputDevice->errorOccurred() || inputDevice->eof())
00419     return false;
00420 
00421   if (byte1 != F_BITS || byte2 != B_BITS)
00422     FATAL("Compressed file F_BITS = " << byte1 << ", B_BITS = " << byte2 << ")! Compressor was compiled with F_BITS = " << F_BITS << ", B_BITS = " << B_BITS << ".");
00423 
00424   //construct reverse Fibonacci code (e.g. 1100101 instead of 1010011) from sequence of
00425   //following bytes. This code represents the base size of the terminal alphabet.
00426   while (!finished)
00427     {
00428       mask = 1 << (SIZEOF_CHAR*8 - 1);
00429       
00430       int c;
00431 
00432       inputDevice->getChar(&c);
00433       if (inputDevice->errorOccurred() || inputDevice->eof())
00434         return false;
00435 
00436       tmpChar = (unsigned char)c;
00437       
00438       while (mask)
00439         {
00440           fibCode >>= 1;
00441           bits++;
00442           
00443           if (tmpChar & mask)
00444             {
00445               fibCode = fibCode | ((unsigned long)1) << (SIZEOF_UNSIGNED_LONG*8-1);
00446               
00447               if (!lastWasOne)
00448                 lastWasOne = true;
00449               else
00450                 {
00451                   //Fibonacci code is now complete
00452                   //--> decode the symbol number
00453                   fibCode >>= (SIZEOF_UNSIGNED_LONG*8 - bits);
00454                   alphabetBaseSize = Fibonacci::decode(fibCode);
00455                   finished = true;
00456                   break;
00457                 }
00458             }
00459           else
00460             lastWasOne = false;
00461           
00462           mask >>= 1;
00463         }
00464     }
00465           
00466   //reset Fibonacci state variables
00467   fibCode = 0;
00468   bits = 0;
00469   finished = false;
00470   lastWasOne = false;
00471 
00472 
00473 
00474 
00475   context->setType(alphabetBaseSize, DynamicContext);
00476 
00477   //get the last fixed symbol of the context table (EOM)
00478   lastFixedContextSymbol = context->initialize();
00479 
00480   //prepare arithCodec for decoding
00481   arithCodec->setInputDevice(inputDevice);
00482   arithCodec->startInputtingBits();
00483   arithCodec->startDecode();
00484 
00485   for (;;)
00486     {
00487       //decode next symbol
00488       symbol = context->decode();
00489 
00490 
00491       //in case of the EOM, break
00492       if (symbol == context->endOfMessage)
00493         break;
00494       
00495       //New variable --> decode it from Fibonacci code
00496       //that follows...
00497 
00498       if (symbol == Context::NotKnown)
00499         {
00500           FATAL("Unknown symbol decoded!!!");
00501         }
00502 
00503       if (symbol > lastFixedContextSymbol)
00504         {
00505           //decoded a variable
00506           //--> append it to the grammar
00507 
00508           Rule *rule;
00509 
00510           NEW(rel, RuleElement);
00511           rel->type = Variable;
00512 
00513           //find rule with corresponding id
00514           rule = kyGrammar->findRule(symbol - lastFixedContextSymbol);
00515 
00516           CHECK_POINTER(rule);
00517 
00518           rel->rule = rule;
00519 
00520           //increase rule ref count
00521           rel->rule->refCount++;
00522         }
00523       else
00524         {
00525           //decoded a terminal
00526           //--> append it to the grammar
00527           NEW(rel, RuleElement);
00528 
00529           rel->type = Terminal;
00530           rel->value = symbol;
00531         }
00532 
00533       kyGrammar->appendToRootRule(rel);
00534     }
00535 
00536 
00537   //Delete all objects
00538   DELETE(kyGrammar);
00539 
00540 
00541   //finish decoding
00542   arithCodec->finishDecode();
00543   arithCodec->doneInputtingBits();
00544 
00545   if (ExaltOptions::getOption(ExaltOptions::Verbose) == ExaltOptions::Yes)
00546     {
00547       DISPLAY_DECOMPRESSION_SUMMARY;
00548     }
00549 
00550   DELETE(arithCodec);
00551   DELETE(context);
00552   DELETE(xmlModel);
00553   DELETE(funnelDevice);
00554 
00555 
00556   DELETE(saxEmitter);
00557 
00558   return true;
00559 }
00560 
00561 

Generated on Wed Feb 5 10:43:02 2003 for Exalt by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002