00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00027 #ifdef __GNUG__
00028 # pragma implementation
00029 #endif
00030
00031 #include "xmlcodec.h"
00032
00033
00035 #define DISPLAY_COMPRESSION_SUMMARY \
00036 { \
00037 OUTPUTENL("Compression performance"); \
00038 OUTPUTENL(" Input data size: \t\t\t" << parseResult << " B"); \
00039 double outSize = arithCodec->numberOfOutputBytes() + strlen(VERSION) + 2*SIZEOF_CHAR; \
00040 OUTPUTENL(" Output data size: \t\t\t" << outputDevice->bytesWritten() << " B"); \
00041 OUTPUTENL(" Compression ratio: \t\t\t1:" << (double)(outSize)/parseResult \
00042 << " (" << (double)(outSize*100)/parseResult << "%)"); \
00043 OUTPUTENL(" Compression rate: \t\t\t" << (outSize / parseResult) * 8 << " bpc"); \
00044 }
00045
00046
00048 #define DISPLAY_DECOMPRESSION_SUMMARY \
00049 { \
00050 OUTPUTENL("Decompression performance"); \
00051 OUTPUTENL(" Input data size: \t\t\t" << inDevice->bytesRead() << " B"); \
00052 OUTPUTENL(" Restored data size: \t\t\t" << funnelDevice->bytesWritten() \
00053 << " B (passed to the model for final processing)"); \
00054 }
00055
00056
00057
00061 XmlCodec::XmlCodec(void)
00062 : XmlCodecBase()
00063 {
00064 }
00065
00066
00067
00071 XmlCodec::~XmlCodec(void)
00072 {
00073 deleteDefaultTextCodec();
00074 }
00075
00076
00082 void XmlCodec::initializePushCoder(IODevice *outDevice)
00083 {
00084 if (coderType == UnknownCoder)
00085
00086 coderType = PushCoder;
00087 else
00088 {
00089 if (coderType == PullCoder)
00090
00091 throw ExaltCoderIsPullException();
00092 else
00093
00094 throw ExaltCoderIsPushException();
00095 }
00096
00097 inputDevice = 0;
00098 outputDevice = outDevice;
00099
00100
00101 NEW(kyGrammar, KYGrammar);
00102
00103
00104 NEW(context, Context);
00105 kyGrammar->setContext(context);
00106
00107 kyGrammar->setOutputDevice(outputDevice);
00108 kyGrammar->setTextCodec(textCodec);
00109
00110
00111 NEW(arithCodec, ArithCodec);
00112
00113
00114 context->setArithCodec(arithCodec);
00115
00116
00117 NEW(xmlParser, XmlParser);
00118
00119
00120
00121 if (ExaltOptions::getOption(ExaltOptions::Model) == ExaltOptions::SimpleModel)
00122 {
00123 NEW(xmlModel, XmlSimpleModel);
00124 outputDevice->putChar(0);
00125 }
00126 else
00127 {
00128 NEW(xmlModel, XmlAdaptiveModel);
00129 outputDevice->putChar(1);
00130 }
00131
00132
00133
00134 xmlParser->setXmlModel(xmlModel);
00135
00136
00137 xmlModel->setGrammar(kyGrammar);
00138
00139
00140
00141
00142 outputDevice->writeData(FILE_STAMP, strlen(FILE_STAMP));
00143
00144
00145 outputDevice->putChar(F_BITS);
00146 outputDevice->putChar(B_BITS);
00147
00148
00149 arithCodec->setOutputDevice(outputDevice);
00150 arithCodec->startOutputtingBits();
00151 arithCodec->startEncode();
00152 }
00153
00154
00155
00163 bool XmlCodec::encodePush(const char *data, int length, bool isFinal = false)
00164 {
00165 if (coderType == PullCoder)
00166
00167 throw ExaltCoderIsPullException();
00168 else
00169 if (coderType == UnknownCoder)
00170
00171 throw ExaltPushCoderNotInitializedException();
00172
00173
00174 bool parseResult = xmlParser->parsePush(data, length, isFinal);
00175
00176 if (isFinal)
00177 {
00178
00179 DELETE(xmlParser);
00180 DELETE(xmlModel);
00181
00182 DELETE(kyGrammar);
00183
00184
00185 if (!parseResult)
00186 {
00187
00189
00190 else
00191 {
00192
00193 context->encodeEndOfMessage();
00194
00195
00196 arithCodec->finishEncode();
00197 arithCodec->doneOutputtingBits();
00198 }
00199
00200
00201 if (parseResult && ExaltOptions::getOption(ExaltOptions::Verbose) == ExaltOptions::Yes)
00202 {
00203 DISPLAY_COMPRESSION_SUMMARY;
00204 }
00205
00206 DELETE(arithCodec);
00207 DELETE(context);
00208
00209 DELETE(xmlModel);
00210 }
00211
00212 return parseResult;
00213 }
00214
00215
00216
00223 bool XmlCodec::encode(IODevice *inDevice, IODevice *outDevice)
00224 {
00225 inputDevice = inDevice;
00226 outputDevice = outDevice;
00227
00228
00229 NEW(kyGrammar, KYGrammar);
00230
00231
00232 NEW(context, Context);
00233
00234 kyGrammar->setOutputDevice(outputDevice);
00235
00236 kyGrammar->setTextCodec(textCodec);
00237
00238 kyGrammar->setContext(context);
00239
00240
00241 NEW(arithCodec, ArithCodec);
00242
00243
00244 context->setArithCodec(arithCodec);
00245
00246
00247 NEW(xmlParser, XmlParser);
00248 xmlParser->setInputDevice(inputDevice);
00249
00250
00251
00252 outputDevice->writeData(FILE_STAMP, strlen(FILE_STAMP));
00253
00254
00255 if (ExaltOptions::getOption(ExaltOptions::Model) == ExaltOptions::SimpleModel)
00256 {
00257 NEW(xmlModel, XmlSimpleModel);
00258 outputDevice->putChar(0);
00259 }
00260 else
00261 {
00262 NEW(xmlModel, XmlAdaptiveModel);
00263 outputDevice->putChar(1);
00264 }
00265
00266
00267 xmlParser->setXmlModel(xmlModel);
00268
00269
00270 xmlModel->setGrammar(kyGrammar);
00271
00272
00273
00274
00275
00276 outputDevice->putChar(F_BITS);
00277 outputDevice->putChar(B_BITS);
00278
00279
00280 arithCodec->setOutputDevice(outputDevice);
00281 arithCodec->startOutputtingBits();
00282 arithCodec->startEncode();
00283
00284 coderType = PullCoder;
00285
00286
00287 long parseResult = xmlParser->parse();
00288
00289
00290 DELETE(kyGrammar);
00291
00292
00293 DELETE(xmlParser);
00294 DELETE(xmlModel);
00295
00296 if (!parseResult)
00297 {
00298
00300
00301 else
00302 {
00303
00304 context->encodeEndOfMessage();
00305
00306
00307 arithCodec->finishEncode();
00308 arithCodec->doneOutputtingBits();
00309 }
00310
00311
00312 if (parseResult && ExaltOptions::getOption(ExaltOptions::Verbose) == ExaltOptions::Yes)
00313 {
00314 DISPLAY_COMPRESSION_SUMMARY;
00315 }
00316
00317 DELETE(arithCodec);
00318 DELETE(context);
00319
00320
00321
00322 return parseResult;
00323 }
00324
00325
00326
00327
00335 bool XmlCodec::decode(IODevice *inDevice, SAXReceptor *receptor, void *userData = 0)
00336 {
00337 FunnelDevice *funnelDevice;
00338 char *fileStamp;
00339 size_t fileStampLength = strlen(FILE_STAMP);
00340 int symbol;
00341 int byte1, byte2;
00342 long lastFixedContextSymbol;
00343 RuleElement *rel;
00344 unsigned char tmpChar, mask;
00345 unsigned long fibCode = 0;
00346 bool lastWasOne = false;
00347 bool finished = false;
00348 char bits = 0;
00349 unsigned long alphabetBaseSize;
00350 SAXEmitter *saxEmitter;
00351
00352
00353 inputDevice = inDevice;
00354
00355
00356 NEW(kyGrammar, KYGrammar);
00357
00358
00359
00360 NEW(context, Context);
00361
00362 kyGrammar->setContext(context, false);
00363
00364
00365 NEW(arithCodec, ArithCodec);
00366
00367
00368 context->setArithCodec(arithCodec);
00369
00370
00371
00372 NEW(fileStamp, char[fileStampLength + 1]);
00373
00374 inputDevice->readData(fileStamp, fileStampLength);
00375 if (inputDevice->errorOccurred() || inputDevice->eof())
00376 return false;
00377
00378 fileStamp[fileStampLength] = '\0';
00379
00380 if (strcmp(FILE_STAMP, fileStamp))
00381 {
00382 DELETE(fileStamp);
00383 ERR("File format not recognized!");
00384 throw ExaltUnknownFileFormatException();
00385 }
00386 else
00387 {
00388 DELETE(fileStamp);
00389 }
00390
00391
00392 inputDevice->getChar(&byte1);
00393 if (!byte1)
00394 {
00395
00396 NEW(xmlModel, XmlSimpleModel);
00397 }
00398 else
00399 {
00400
00401 NEW(xmlModel, XmlAdaptiveModel);
00402 }
00403
00404 NEW(saxEmitter, SAXEmitter(receptor));
00405 xmlModel->setSAXEmitter(saxEmitter, userData);
00406
00407 NEW(funnelDevice, FunnelDevice(xmlModel, 4096, true));
00408 funnelDevice->prepare();
00409 kyGrammar->setOutputDevice(funnelDevice);
00410 kyGrammar->setTextCodec(textCodec);
00411
00412
00413 inputDevice->getChar(&byte1);
00414 if (inputDevice->errorOccurred() || inputDevice->eof())
00415 return false;
00416
00417 inputDevice->getChar(&byte2);
00418 if (inputDevice->errorOccurred() || inputDevice->eof())
00419 return false;
00420
00421 if (byte1 != F_BITS || byte2 != B_BITS)
00422 FATAL("Compressed file F_BITS = " << byte1 << ", B_BITS = " << byte2 << ")! Compressor was compiled with F_BITS = " << F_BITS << ", B_BITS = " << B_BITS << ".");
00423
00424
00425
00426 while (!finished)
00427 {
00428 mask = 1 << (SIZEOF_CHAR*8 - 1);
00429
00430 int c;
00431
00432 inputDevice->getChar(&c);
00433 if (inputDevice->errorOccurred() || inputDevice->eof())
00434 return false;
00435
00436 tmpChar = (unsigned char)c;
00437
00438 while (mask)
00439 {
00440 fibCode >>= 1;
00441 bits++;
00442
00443 if (tmpChar & mask)
00444 {
00445 fibCode = fibCode | ((unsigned long)1) << (SIZEOF_UNSIGNED_LONG*8-1);
00446
00447 if (!lastWasOne)
00448 lastWasOne = true;
00449 else
00450 {
00451
00452
00453 fibCode >>= (SIZEOF_UNSIGNED_LONG*8 - bits);
00454 alphabetBaseSize = Fibonacci::decode(fibCode);
00455 finished = true;
00456 break;
00457 }
00458 }
00459 else
00460 lastWasOne = false;
00461
00462 mask >>= 1;
00463 }
00464 }
00465
00466
00467 fibCode = 0;
00468 bits = 0;
00469 finished = false;
00470 lastWasOne = false;
00471
00472
00473
00474
00475 context->setType(alphabetBaseSize, DynamicContext);
00476
00477
00478 lastFixedContextSymbol = context->initialize();
00479
00480
00481 arithCodec->setInputDevice(inputDevice);
00482 arithCodec->startInputtingBits();
00483 arithCodec->startDecode();
00484
00485 for (;;)
00486 {
00487
00488 symbol = context->decode();
00489
00490
00491
00492 if (symbol == context->endOfMessage)
00493 break;
00494
00495
00496
00497
00498 if (symbol == Context::NotKnown)
00499 {
00500 FATAL("Unknown symbol decoded!!!");
00501 }
00502
00503 if (symbol > lastFixedContextSymbol)
00504 {
00505
00506
00507
00508 Rule *rule;
00509
00510 NEW(rel, RuleElement);
00511 rel->type = Variable;
00512
00513
00514 rule = kyGrammar->findRule(symbol - lastFixedContextSymbol);
00515
00516 CHECK_POINTER(rule);
00517
00518 rel->rule = rule;
00519
00520
00521 rel->rule->refCount++;
00522 }
00523 else
00524 {
00525
00526
00527 NEW(rel, RuleElement);
00528
00529 rel->type = Terminal;
00530 rel->value = symbol;
00531 }
00532
00533 kyGrammar->appendToRootRule(rel);
00534 }
00535
00536
00537
00538 DELETE(kyGrammar);
00539
00540
00541
00542 arithCodec->finishDecode();
00543 arithCodec->doneInputtingBits();
00544
00545 if (ExaltOptions::getOption(ExaltOptions::Verbose) == ExaltOptions::Yes)
00546 {
00547 DISPLAY_DECOMPRESSION_SUMMARY;
00548 }
00549
00550 DELETE(arithCodec);
00551 DELETE(context);
00552 DELETE(xmlModel);
00553 DELETE(funnelDevice);
00554
00555
00556 DELETE(saxEmitter);
00557
00558 return true;
00559 }
00560
00561