1 /*****************************************************************************\
3 * Name : utf_conversion *
4 * Author : Unicode, Inc. (C conversion functions) *
5 * Author : Chris Koeritz (C++ conversion classes) *
7 *******************************************************************************
8 * Copyright (c) 2006-$now By Author. This program is free software; you can *
9 * redistribute it and/or modify it under the terms of the GNU General Public *
10 * License as published by the Free Software Foundation; either version 2 of *
11 * the License or (at your option) any later version. This is online at: *
12 * http://www.fsf.org/copyleft/gpl.html *
13 * Please send any updates to: fred@gruntose.com *
14 \*****************************************************************************/
16 //copyright below is relevant to UTF conversion methods only.
18 * Copyright 2001-$now Unicode, Inc.
22 * This source code is provided as is by Unicode, Inc. No claims are
23 * made as to fitness for any particular purpose. No warranties of any
24 * kind are expressed or implied. The recipient agrees to determine
25 * applicability of information provided. If this file has been
26 * purchased on magnetic or optical media from Unicode, Inc., the
27 * sole remedy for any claim will be exchange of defective media
28 * within 90 days of receipt.
30 * Limitations on Rights to Redistribute This Code
32 * Unicode, Inc. hereby grants the right to freely use the information
33 * supplied in this file in the creation of products supporting the
34 * Unicode Standard, and to make copies of this file in any form
35 * for internal or external distribution as long as this notice
39 /* ---------------------------------------------------------------------
41 Conversions between UTF32, UTF-16, and UTF-8. Source code file.
42 Author: Mark E. Davis, 1994.
43 Rev History: Rick McGowan, fixes & updates May 2001.
44 Sept 2001: fixed const & error conditions per
45 mods suggested by S. Parent & A. Lillich.
46 June 2002: Tim Dodd added detection and handling of incomplete
47 source sequences, enhanced error detection, added casts
48 to eliminate compiler warnings.
49 July 2003: slight mods to back out aggressive FFFE detection.
50 Jan 2004: updated switches in from-UTF8 conversions.
51 Oct 2004: updated to use UNI_MAX_LEGAL_UTF32 in UTF-32 conversions.
53 See the header file "ConvertUTF.h" for complete documentation.
55 ------------------------------------------------------------------------ */
58 #include "utf_conversion.h"
68 static const int halfShift = 10; /* used for shifting by 10 bits */
70 static const UTF32 halfBase = 0x0010000UL;
71 static const UTF32 halfMask = 0x3FFUL;
73 #define UNI_SUR_HIGH_START (UTF32)0xD800
74 #define UNI_SUR_HIGH_END (UTF32)0xDBFF
75 #define UNI_SUR_LOW_START (UTF32)0xDC00
76 #define UNI_SUR_LOW_END (UTF32)0xDFFF
78 /* --------------------------------------------------------------------- */
80 ConversionResult ConvertUTF32toUTF16 (
81 const UTF32** sourceStart, const UTF32* sourceEnd,
82 UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
83 ConversionResult result = conversionOK;
84 const UTF32* source = *sourceStart;
85 UTF16* target = *targetStart;
86 while (source < sourceEnd) {
88 if (target >= targetEnd) {
89 result = targetExhausted; break;
92 if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
93 /* UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values */
94 if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
95 if (flags == strictConversion) {
96 --source; /* return to the illegal value itself */
97 result = sourceIllegal;
100 *target++ = UNI_REPLACEMENT_CHAR;
103 *target++ = (UTF16)ch; /* normal case */
105 } else if (ch > UNI_MAX_LEGAL_UTF32) {
106 if (flags == strictConversion) {
107 result = sourceIllegal;
109 *target++ = UNI_REPLACEMENT_CHAR;
112 /* target is a character in range 0xFFFF - 0x10FFFF. */
113 if (target + 1 >= targetEnd) {
114 --source; /* Back up source pointer! */
115 result = targetExhausted; break;
118 *target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START);
119 *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START);
122 *sourceStart = source;
123 *targetStart = target;
127 /* --------------------------------------------------------------------- */
129 ConversionResult ConvertUTF16toUTF32 (
130 const UTF16** sourceStart, const UTF16* sourceEnd,
131 UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
132 ConversionResult result = conversionOK;
133 const UTF16* source = *sourceStart;
134 UTF32* target = *targetStart;
136 while (source < sourceEnd) {
137 const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */
139 /* If we have a surrogate pair, convert to UTF32 first. */
140 if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
141 /* If the 16 bits following the high surrogate are in the source buffer... */
142 if (source < sourceEnd) {
144 /* If it's a low surrogate, convert to UTF32. */
145 if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
146 ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
147 + (ch2 - UNI_SUR_LOW_START) + halfBase;
149 } else if (flags == strictConversion) { /* it's an unpaired high surrogate */
150 --source; /* return to the illegal value itself */
151 result = sourceIllegal;
154 } else { /* We don't have the 16 bits following the high surrogate. */
155 --source; /* return to the high surrogate */
156 result = sourceExhausted;
159 } else if (flags == strictConversion) {
160 /* UTF-16 surrogate values are illegal in UTF-32 */
161 if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
162 --source; /* return to the illegal value itself */
163 result = sourceIllegal;
167 if (target >= targetEnd) {
168 source = oldSource; /* Back up source pointer! */
169 result = targetExhausted; break;
173 *sourceStart = source;
174 *targetStart = target;
176 if (result == sourceIllegal) {
177 fprintf(stderr, "ConvertUTF16toUTF32 illegal seq 0x%04x,%04x\n", ch, ch2);
184 /* --------------------------------------------------------------------- */
187 * Index into the table below with the first byte of a UTF-8 sequence to
188 * get the number of trailing bytes that are supposed to follow it.
189 * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
190 * left as-is for anyone who may want to do such conversion, which was
191 * allowed in earlier algorithms.
193 static const char trailingBytesForUTF8[256] = {
194 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
195 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
196 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
197 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
198 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
199 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
200 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
201 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
205 * Magic values subtracted from a buffer value during UTF8 conversion.
206 * This table contains as many values as there might be trailing bytes
207 * in a UTF-8 sequence.
209 static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
210 0x03C82080UL, 0xFA082080UL, 0x82082080UL };
213 * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
214 * into the first byte, depending on how many bytes follow. There are
215 * as many entries in this table as there are UTF-8 sequence types.
216 * (I.e., one byte sequence, two byte... etc.). Remember that sequencs
217 * for *legal* UTF-8 will be 4 or fewer bytes total.
219 static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
221 /* --------------------------------------------------------------------- */
223 /* The interface converts a whole buffer to avoid function-call overhead.
224 * Constants have been gathered. Loops & conditionals have been removed as
225 * much as possible for efficiency, in favor of drop-through switches.
226 * (See "Note A" at the bottom of the file for equivalent code.)
227 * If your compiler supports it, the "isLegalUTF8" call can be turned
228 * into an inline function.
231 /* --------------------------------------------------------------------- */
233 ConversionResult ConvertUTF16toUTF8 (
234 const UTF16** sourceStart, const UTF16* sourceEnd,
235 UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
236 ConversionResult result = conversionOK;
237 const UTF16* source = *sourceStart;
238 UTF8* target = *targetStart;
239 while (source < sourceEnd) {
241 unsigned short bytesToWrite = 0;
242 const UTF32 byteMask = 0xBF;
243 const UTF32 byteMark = 0x80;
244 const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */
246 /* If we have a surrogate pair, convert to UTF32 first. */
247 if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
248 /* If the 16 bits following the high surrogate are in the source buffer... */
249 if (source < sourceEnd) {
251 /* If it's a low surrogate, convert to UTF32. */
252 if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
253 ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
254 + (ch2 - UNI_SUR_LOW_START) + halfBase;
256 } else if (flags == strictConversion) { /* it's an unpaired high surrogate */
257 --source; /* return to the illegal value itself */
258 result = sourceIllegal;
261 } else { /* We don't have the 16 bits following the high surrogate. */
262 --source; /* return to the high surrogate */
263 result = sourceExhausted;
266 } else if (flags == strictConversion) {
267 /* UTF-16 surrogate values are illegal in UTF-32 */
268 if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
269 --source; /* return to the illegal value itself */
270 result = sourceIllegal;
274 /* Figure out how many bytes the result will require */
275 if (ch < (UTF32)0x80) { bytesToWrite = 1;
276 } else if (ch < (UTF32)0x800) { bytesToWrite = 2;
277 } else if (ch < (UTF32)0x10000) { bytesToWrite = 3;
278 } else if (ch < (UTF32)0x110000) { bytesToWrite = 4;
279 } else { bytesToWrite = 3;
280 ch = UNI_REPLACEMENT_CHAR;
283 target += bytesToWrite;
284 if (target > targetEnd) {
285 source = oldSource; /* Back up source pointer! */
286 target -= bytesToWrite; result = targetExhausted; break;
288 switch (bytesToWrite) { /* note: everything falls through. */
289 case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
290 case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
291 case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
292 case 1: *--target = (UTF8)(ch | firstByteMark[bytesToWrite]);
294 target += bytesToWrite;
296 *sourceStart = source;
297 *targetStart = target;
301 /* --------------------------------------------------------------------- */
304 * Utility routine to tell whether a sequence of bytes is legal UTF-8.
305 * This must be called with the length pre-determined by the first byte.
306 * If not calling this from ConvertUTF8to*, then the length can be set by:
307 * length = trailingBytesForUTF8[*source]+1;
308 * and the sequence is illegal right away if there aren't that many bytes
310 * If presented with a length > 4, this returns false. The Unicode
311 * definition of UTF-8 goes up to 4-byte sequences.
314 static Booleano isLegalUTF8(const UTF8 *source, int length) {
316 const UTF8 *srcptr = source+length;
318 default: return false;
319 /* Everything else falls through when "true"... */
320 case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
321 case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
322 case 2: if ((a = (*--srcptr)) > 0xBF) return false;
325 /* no fall-through in this inner switch */
326 case 0xE0: if (a < 0xA0) return false; break;
327 case 0xED: if (a > 0x9F) return false; break;
328 case 0xF0: if (a < 0x90) return false; break;
329 case 0xF4: if (a > 0x8F) return false; break;
330 default: if (a < 0x80) return false;
333 case 1: if (*source >= 0x80 && *source < 0xC2) return false;
335 if (*source > 0xF4) return false;
339 /* --------------------------------------------------------------------- */
342 * Exported function to return whether a UTF-8 sequence is legal or not.
343 * This is not used here; it's just exported.
345 Booleano isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) {
346 int length = trailingBytesForUTF8[*source]+1;
347 if (source+length > sourceEnd) {
350 return isLegalUTF8(source, length);
353 /* --------------------------------------------------------------------- */
355 ConversionResult ConvertUTF8toUTF16 (
356 const UTF8** sourceStart, const UTF8* sourceEnd,
357 UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
358 ConversionResult result = conversionOK;
359 const UTF8* source = *sourceStart;
360 UTF16* target = *targetStart;
361 while (source < sourceEnd) {
363 unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
364 if (source + extraBytesToRead >= sourceEnd) {
365 result = sourceExhausted; break;
367 /* Do this check whether lenient or strict */
368 if (! isLegalUTF8(source, extraBytesToRead+1)) {
369 result = sourceIllegal;
373 * The cases all fall through. See "Note A" below.
375 switch (extraBytesToRead) {
376 case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
377 case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
378 case 3: ch += *source++; ch <<= 6;
379 case 2: ch += *source++; ch <<= 6;
380 case 1: ch += *source++; ch <<= 6;
381 case 0: ch += *source++;
383 ch -= offsetsFromUTF8[extraBytesToRead];
385 if (target >= targetEnd) {
386 source -= (extraBytesToRead+1); /* Back up source pointer! */
387 result = targetExhausted; break;
389 if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
390 /* UTF-16 surrogate values are illegal in UTF-32 */
391 if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
392 if (flags == strictConversion) {
393 source -= (extraBytesToRead+1); /* return to the illegal value itself */
394 result = sourceIllegal;
397 *target++ = UNI_REPLACEMENT_CHAR;
400 *target++ = (UTF16)ch; /* normal case */
402 } else if (ch > UNI_MAX_UTF16) {
403 if (flags == strictConversion) {
404 result = sourceIllegal;
405 source -= (extraBytesToRead+1); /* return to the start */
406 break; /* Bail out; shouldn't continue */
408 *target++ = UNI_REPLACEMENT_CHAR;
411 /* target is a character in range 0xFFFF - 0x10FFFF. */
412 if (target + 1 >= targetEnd) {
413 source -= (extraBytesToRead+1); /* Back up source pointer! */
414 result = targetExhausted; break;
417 *target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START);
418 *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START);
421 *sourceStart = source;
422 *targetStart = target;
426 /* --------------------------------------------------------------------- */
428 ConversionResult ConvertUTF32toUTF8 (
429 const UTF32** sourceStart, const UTF32* sourceEnd,
430 UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
431 ConversionResult result = conversionOK;
432 const UTF32* source = *sourceStart;
433 UTF8* target = *targetStart;
434 while (source < sourceEnd) {
436 unsigned short bytesToWrite = 0;
437 const UTF32 byteMask = 0xBF;
438 const UTF32 byteMark = 0x80;
440 if (flags == strictConversion ) {
441 /* UTF-16 surrogate values are illegal in UTF-32 */
442 if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
443 --source; /* return to the illegal value itself */
444 result = sourceIllegal;
449 * Figure out how many bytes the result will require. Turn any
450 * illegally large UTF32 things (> Plane 17) into replacement chars.
452 if (ch < (UTF32)0x80) { bytesToWrite = 1;
453 } else if (ch < (UTF32)0x800) { bytesToWrite = 2;
454 } else if (ch < (UTF32)0x10000) { bytesToWrite = 3;
455 } else if (ch <= UNI_MAX_LEGAL_UTF32) { bytesToWrite = 4;
456 } else { bytesToWrite = 3;
457 ch = UNI_REPLACEMENT_CHAR;
458 result = sourceIllegal;
461 target += bytesToWrite;
462 if (target > targetEnd) {
463 --source; /* Back up source pointer! */
464 target -= bytesToWrite; result = targetExhausted; break;
466 switch (bytesToWrite) { /* note: everything falls through. */
467 case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
468 case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
469 case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
470 case 1: *--target = (UTF8) (ch | firstByteMark[bytesToWrite]);
472 target += bytesToWrite;
474 *sourceStart = source;
475 *targetStart = target;
479 /* --------------------------------------------------------------------- */
481 ConversionResult ConvertUTF8toUTF32 (
482 const UTF8** sourceStart, const UTF8* sourceEnd,
483 UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
484 ConversionResult result = conversionOK;
485 const UTF8* source = *sourceStart;
486 UTF32* target = *targetStart;
487 while (source < sourceEnd) {
489 unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
490 if (source + extraBytesToRead >= sourceEnd) {
491 result = sourceExhausted; break;
493 /* Do this check whether lenient or strict */
494 if (! isLegalUTF8(source, extraBytesToRead+1)) {
495 result = sourceIllegal;
499 * The cases all fall through. See "Note A" below.
501 switch (extraBytesToRead) {
502 case 5: ch += *source++; ch <<= 6;
503 case 4: ch += *source++; ch <<= 6;
504 case 3: ch += *source++; ch <<= 6;
505 case 2: ch += *source++; ch <<= 6;
506 case 1: ch += *source++; ch <<= 6;
507 case 0: ch += *source++;
509 ch -= offsetsFromUTF8[extraBytesToRead];
511 if (target >= targetEnd) {
512 source -= (extraBytesToRead+1); /* Back up the source pointer! */
513 result = targetExhausted; break;
515 if (ch <= UNI_MAX_LEGAL_UTF32) {
517 * UTF-16 surrogate values are illegal in UTF-32, and anything
518 * over Plane 17 (> 0x10FFFF) is illegal.
520 if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
521 if (flags == strictConversion) {
522 source -= (extraBytesToRead+1); /* return to the illegal value itself */
523 result = sourceIllegal;
526 *target++ = UNI_REPLACEMENT_CHAR;
531 } else { /* i.e., ch > UNI_MAX_LEGAL_UTF32 */
532 result = sourceIllegal;
533 *target++ = UNI_REPLACEMENT_CHAR;
536 *sourceStart = source;
537 *targetStart = target;
541 /* ---------------------------------------------------------------------
544 The fall-through switches in UTF-8 reading code save a
545 temp variable, some decrements & conditionals. The switches
546 are equivalent to the following loop:
548 int tmpBytesToRead = extraBytesToRead+1;
552 if (tmpBytesToRead) ch <<= 6;
553 } while (tmpBytesToRead > 0);
555 In UTF-8 writing code, the switches on "bytesToWrite" are
556 similarly unrolled loops.
558 --------------------------------------------------------------------- */
564 transcode_to_utf16::transcode_to_utf16(const char *utf8_input)
565 : _orig_length(int(strlen(utf8_input)) + 1),
566 _converted(new UTF16[_orig_length + 1])
567 // we don't ever expect the string to get longer going to the larger data
568 // type, so the current length should be enough.
570 _result = conversionOK;
571 if (_orig_length == 1) {
572 // no length, so only provide a blank string.
576 memset((abyte *)_converted, 0, 2 * _orig_length);
577 // we use these temporary pointers since the converter resets the source
578 // and target pointers to the end of the conversion. the same pattern
579 // is used in the code below.
580 const UTF8 *temp_in = (const UTF8 *)utf8_input;
581 UTF16 *temp_out = _converted;
582 _result = ConvertUTF8toUTF16(&temp_in, temp_in + _orig_length,
583 &temp_out, temp_out + _orig_length, lenientConversion);
586 transcode_to_utf16::transcode_to_utf16(const astring &utf8_input)
587 : _orig_length(utf8_input.length() + 1),
588 _converted(new UTF16[_orig_length])
590 _result = conversionOK;
591 if (_orig_length == 1) {
592 // no length, so only provide a blank string.
596 memset((abyte *)_converted, 0, 2 * _orig_length);
597 const UTF8 *temp_in = (const UTF8 *)utf8_input.observe();
598 UTF16 *temp_out = _converted;
599 _result = ConvertUTF8toUTF16(&temp_in, temp_in + _orig_length,
600 &temp_out, temp_out + _orig_length, lenientConversion);
603 transcode_to_utf16::~transcode_to_utf16()
605 delete [] _converted;
609 int transcode_to_utf16::length() const
610 { return int(wcslen((wchar_t *)_converted)); }
614 transcode_to_utf8::transcode_to_utf8(const UTF16 *utf16_input)
615 : _orig_length(int(wcslen((const wchar_t *)utf16_input))),
616 _new_length(_orig_length * 2 + _orig_length / 2 + 1),
617 // this is just an estimate. it may be appropriate most of the time.
618 // whatever doesn't fit will get truncated.
619 _converted(new UTF8[_new_length])
621 _result = conversionOK;
622 if (_orig_length == 0) {
623 // no length, so only provide a blank string.
627 memset(_converted, 0, _new_length);
628 const UTF16 *temp_in = (const UTF16 *)utf16_input;
629 UTF8 *temp_out = _converted;
630 _result = ConvertUTF16toUTF8(&temp_in, temp_in + _orig_length,
631 &temp_out, temp_out + _new_length, lenientConversion);
634 transcode_to_utf8::transcode_to_utf8(const wchar_t *utf16_input)
635 : _orig_length(int(wcslen(utf16_input))),
636 _new_length(_orig_length * 2 + _orig_length / 2 + 1),
637 // this is just an estimate. it may be appropriate most of the time.
638 // whatever doesn't fit will get truncated.
639 _converted(new UTF8[_new_length > 0 ? _new_length : 1])
641 _result = conversionOK;
642 if (_orig_length == 0) {
643 // no length, so only provide a blank string.
647 memset(_converted, 0, _new_length);
648 const UTF16 *temp_in = (const UTF16 *)utf16_input;
649 UTF8 *temp_out = _converted;
650 _result = ConvertUTF16toUTF8(&temp_in, temp_in + _orig_length,
651 &temp_out, temp_out + _new_length, lenientConversion);
654 transcode_to_utf8::~transcode_to_utf8()
656 delete [] _converted;
660 int transcode_to_utf8::length() const
661 { return int(strlen((char *)_converted)); }
663 transcode_to_utf8::operator astring() const
664 { return astring((char *)_converted); }
668 null_transcoder::null_transcoder(const char *utf8_input, bool make_own_copy)
669 : _make_own_copy(make_own_copy),
670 _converted(make_own_copy? new UTF8[strlen(utf8_input) + 1]
671 : (const UTF8 *)utf8_input)
673 if (_make_own_copy) {
674 strcpy((char *)_converted, utf8_input);
678 null_transcoder::null_transcoder(const astring &utf8_input, bool make_own_copy)
679 : _make_own_copy(make_own_copy),
680 _converted(make_own_copy? new UTF8[utf8_input.length() + 1]
681 : (const UTF8 *)utf8_input.s())
683 if (_make_own_copy) {
684 strcpy((char *)_converted, utf8_input.s());
688 int null_transcoder::length() const
689 { return int(strlen((char *)_converted)); }