00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00026 #ifdef __GNUG__
00027 # pragma implementation
00028 #endif
00029
00030
00031 #include "textcodec.h"
00032
00033
00037
00038
00042 static int US_ASCII_to_UTF8_table[] =
00043 {
00044
00045 -1, -1, -1, -1, -1, -1, -1, -1,
00046 -1, -1, -1, -1, -1, -1, -1, -1,
00047 -1, -1, -1, -1, -1, -1, -1, -1,
00048 -1, -1, -1, -1, -1, -1, -1, -1,
00049 -1, -1, -1, -1, -1, -1, -1, -1,
00050 -1, -1, -1, -1, -1, -1, -1, -1,
00051 -1, -1, -1, -1, -1, -1, -1, -1,
00052 -1, -1, -1, -1, -1, -1, -1, -1,
00053 -1, -1, -1, -1, -1, -1, -1, -1,
00054 -1, -1, -1, -1, -1, -1, -1, -1,
00055 -1, -1, -1, -1, -1, -1, -1, -1,
00056 -1, -1, -1, -1, -1, -1, -1, -1,
00057 -1, -1, -1, -1, -1, -1, -1, -1,
00058 -1, -1, -1, -1, -1, -1, -1, -1,
00059 -1, -1, -1, -1, -1, -1, -1, -1,
00060 -1, -1, -1, -1, -1, -1, -1, -1
00061 };
00062
00063
00064
00068 static int ISO_8859_2_to_UTF8_table[] =
00069 {
00070 0xc280, 0xc281, 0xc282, 0xc283, 0xc284, 0xc285, 0xc286, 0xc287,
00071 0xc288, 0xc289, 0xc28a, 0xc28b, 0xc28c, 0xc28d, 0xc28e, 0xc28f,
00072 0xc290, 0xc291, 0xc292, 0xc293, 0xc294, 0xc295, 0xc296, 0xc297,
00073 0xc298, 0xc299, 0xc29a, 0xc29b, 0xc29c, 0xc29d, 0xc29e, 0xc29f,
00074 0xc2a0, 0xc484, 0xcb98, 0xc581, 0xc2a4, 0xc4bd, 0xc59a, 0xc2a7,
00075 0xc2a8, 0xc5a0, 0xc59e, 0xc5a4, 0xc5b9, 0xc2ad, 0xc5bd, 0xc5bb,
00076 0xc2b0, 0xc485, 0xcb9b, 0xc582, 0xc2b4, 0xc4be, 0xc59b, 0xcb87,
00077 0xc2b8, 0xc5a1, 0xc59f, 0xc5a5, 0xc5ba, 0xcb9d, 0xc5be, 0xc5bc,
00078 0xc594, 0xc381, 0xc382, 0xc482, 0xc384, 0xc4b9, 0xc486, 0xc387,
00079 0xc48c, 0xc389, 0xc498, 0xc38b, 0xc49a, 0xc38d, 0xc38e, 0xc48e,
00080 0xc490, 0xc583, 0xc587, 0xc393, 0xc394, 0xc590, 0xc396, 0xc397,
00081 0xc598, 0xc5ae, 0xc39a, 0xc5b0, 0xc39c, 0xc39d, 0xc5a2, 0xc39f,
00082 0xc595, 0xc3a1, 0xc3a2, 0xc483, 0xc3a4, 0xc4ba, 0xc487, 0xc3a7,
00083 0xc48d, 0xc3a9, 0xc499, 0xc3ab, 0xc49b, 0xc3ad, 0xc3ae, 0xc48f,
00084 0xc491, 0xc584, 0xc588, 0xc3b3, 0xc3b4, 0xc591, 0xc3b6, 0xc3b7,
00085 0xc599, 0xc5af, 0xc3ba, 0xc5b1, 0xc3bc, 0xc3bd, 0xc5a3, 0xcb99
00086 };
00087
00088
00089
00096 #define EXPAT_MAP_SINGLE_BYTE_ENCODING_TO_UTF8(_enc_, _info_) \
00097 { \
00098 size_t i; \
00099 \
00100 for (i = 0; i < 128; i++) \
00101 _info_->map[i] = i; \
00102 \
00103 for (i = 128; i < 256; i++) \
00104 _info_->map[i] = _enc_ ## _to_UTF8_table[i-128]; \
00105 }
00106
00107
00108
00116 #define ENCODING_SIZE(_mib_, _mibGiven_, _size_) \
00117 if (_mib_ == _mibGiven_) \
00118 return _size_;
00119
00120
00121
00129 #define ENCODING_MIB(_enc_, _encGiven_, _mib_) \
00130 if (!xmlchar_cstrcmp(_encGiven_, _enc_)) \
00131 return _mib_; \
00132
00133
00134
00138
00139
00145 unsigned long TextCodec::suggestAlphabetBaseSize(Encodings::MIB mib) throw (ExaltUnknownEncodingException)
00146 {
00147 ENCODING_SIZE(Encodings::UTF_8, mib, 256);
00148 ENCODING_SIZE(Encodings::UTF_16, mib, 256);
00149 ENCODING_SIZE(Encodings::ISO_8859_1, mib, 256);
00150 ENCODING_SIZE(Encodings::ISO_8859_2, mib, 256);
00151 ENCODING_SIZE(Encodings::US_ASCII, mib, 128);
00152
00153 throw ExaltUnknownEncodingException();
00154 return DEFAULT_ALPHABET_BASE_SIZE;
00155 }
00156
00157
00158
00165 bool TextCodec::isAbleToConvert(Encodings::MIB mib)
00166 {
00167 switch (mib)
00168 {
00169 case Encodings::US_ASCII:
00170
00171
00172 return true;
00173
00174 case Encodings::ISO_8859_2:
00175 return true;
00176
00177 default:
00178 return false;
00179 }
00180 }
00181
00182
00183
00190 void TextCodec::fillInMapArray(XmlEncoding *info, Encodings::MIB mib)
00191 {
00192
00193
00194
00195
00196
00197
00198
00199
00200
00201 switch (mib)
00202 {
00203 case Encodings::US_ASCII:
00204
00205
00206
00207 EXPAT_MAP_SINGLE_BYTE_ENCODING_TO_UTF8(US_ASCII, info);
00208 break;
00209
00210 case Encodings::ISO_8859_2:
00211 EXPAT_MAP_SINGLE_BYTE_ENCODING_TO_UTF8(ISO_8859_2, info);
00212
00213 break;
00214
00215 default:
00216
00217 for (size_t i = 0; i < 256; i++)
00218 info->map[i] = -1;
00219 }
00220 }
00221
00222
00223
00232 int TextCodec::convert(const char *s, Encodings::MIB mib)
00233 {
00234 switch (mib)
00235 {
00236 case Encodings::US_ASCII:
00237
00238
00239
00240 return *s;
00241
00242 default:
00243
00244 return -1;
00245 }
00246 }
00247
00248
00254 void TextCodec::release(Encodings::MIB mib)
00255 {
00256
00257 }
00258
00259
00260
00271 bool TextCodec::knowsMIB(Encodings::MIB mib)
00272 {
00273 size_t i = 0;
00274 Encodings::EncodingName encodingNames[] = { ENCODING_NAMES };
00275
00276 while (encodingNames[i].name)
00277 {
00278 if (encodingNames[i].mib == mib)
00279 return true;
00280
00281 i++;
00282 }
00283
00284 return false;
00285 }
00286
00287
00288
00296 Encodings::MIB TextCodec::getMIB(const XmlChar *encoding) throw (ExaltUnknownEncodingException)
00297 {
00298 if (!encoding)
00299 {
00300 #ifdef XML_UNICODE
00301
00302 return Encodings::UTF_16;
00303 #else
00304
00305 return Encodings::UTF_8;
00306 #endif
00307 }
00308
00309 ENCODING_MIB("US-ASCII", encoding, Encodings::US_ASCII);
00310
00311 ENCODING_MIB("KOI8_V", encoding, Encodings::KOI8_V);
00312 ENCODING_MIB("KOI8_R", encoding, Encodings::KOI8_R);
00313
00314 ENCODING_MIB("UTF-8", encoding, Encodings::UTF_8);
00315 ENCODING_MIB("UTF-16", encoding, Encodings::UTF_16);
00316
00317 ENCODING_MIB("ISO-8859-1", encoding, Encodings::ISO_8859_1);
00318 ENCODING_MIB("ISO-8859-2", encoding, Encodings::ISO_8859_2);
00319 ENCODING_MIB("ISO-8859-3", encoding, Encodings::ISO_8859_3);
00320 ENCODING_MIB("ISO-8859-4", encoding, Encodings::ISO_8859_4);
00321 ENCODING_MIB("ISO-8859-5", encoding, Encodings::ISO_8859_5);
00322 ENCODING_MIB("ISO-8859-6", encoding, Encodings::ISO_8859_6);
00323 ENCODING_MIB("ISO-8859-7", encoding, Encodings::ISO_8859_7);
00324 ENCODING_MIB("ISO-8859-8", encoding, Encodings::ISO_8859_8);
00325 ENCODING_MIB("ISO-8859-9", encoding, Encodings::ISO_8859_9);
00326 ENCODING_MIB("ISO-8859-10", encoding, Encodings::ISO_8859_10);
00327 ENCODING_MIB("ISO-8859-11", encoding, Encodings::ISO_8859_11);
00328 ENCODING_MIB("ISO-8859-13", encoding, Encodings::ISO_8859_13);
00329 ENCODING_MIB("ISO-8859-14", encoding, Encodings::ISO_8859_14);
00330 ENCODING_MIB("ISO-8859-15", encoding, Encodings::ISO_8859_15);
00331
00332 ENCODING_MIB("CP_1250", encoding, Encodings::CP_1250);
00333 ENCODING_MIB("CP_1251", encoding, Encodings::CP_1251);
00334 ENCODING_MIB("CP_1252", encoding, Encodings::CP_1252);
00335 ENCODING_MIB("CP_1253", encoding, Encodings::CP_1253);
00336 ENCODING_MIB("CP_1254", encoding, Encodings::CP_1254);
00337 ENCODING_MIB("CP_1255", encoding, Encodings::CP_1255);
00338 ENCODING_MIB("CP_1256", encoding, Encodings::CP_1256);
00339 ENCODING_MIB("CP_1257", encoding, Encodings::CP_1257);
00340 ENCODING_MIB("CP_1258", encoding, Encodings::CP_1258);
00341
00342
00343 ERR("Unknown character encoding: " << encoding);
00344 throw ExaltUnknownEncodingException();
00345 return Encodings::Unknown;
00346 }
00347
00348
00349
00357 void TextCodec::output(IODevice *device, const XmlChar c, Encodings::MIB toEncoding) throw (ExaltEncodingException, ExaltIOException)
00358 {
00359 switch (toEncoding)
00360 {
00361 case Encodings::UTF_8:
00362 case Encodings::UTF_16:
00363 device->writeData((const char *)&c, SIZEOF_XML_CHAR);
00364 break;
00365
00366 default:
00367
00368 ERR("Unsupported output encoding!");
00369 throw ExaltUnsupportedOutputEncodingException();
00370 }
00371 }
00372
00373
00374
00382 void TextCodec::output(IODevice *device, const XmlChar *str, Encodings::MIB toEncoding) throw (ExaltEncodingException, ExaltIOException)
00383 {
00384 switch (toEncoding)
00385 {
00386 case Encodings::UTF_8:
00387 case Encodings::UTF_16:
00388 for (size_t i = 0; str[i]; i++)
00389 device->writeData((const char *)&str[i], SIZEOF_XML_CHAR);
00390 break;
00391
00392 default:
00393
00394 ERR("Unsupported output encoding!");
00395 throw ExaltUnsupportedOutputEncodingException();
00396 }
00397 }
00398
00399
00400
00409 void TextCodec::output(IODevice *device, const XmlChar *str, size_t length, Encodings::MIB toEncoding) throw (ExaltEncodingException, ExaltIOException)
00410 {
00411 switch (toEncoding)
00412 {
00413 case Encodings::UTF_8:
00414 case Encodings::UTF_16:
00415 for (size_t i = 0; i < length; i++)
00416 device->writeData((const char *)&str[i], SIZEOF_XML_CHAR);
00417 break;
00418
00419 default:
00420
00421 ERR("Unsupported output encoding!");
00422 throw ExaltUnsupportedOutputEncodingException();
00423 }
00424 }
00425