From e2b2c974b9b441cfe65e9b3530807d13bd73fef0 Mon Sep 17 00:00:00 2001 From: Chris Koeritz Date: Thu, 7 Mar 2013 17:49:57 -0500 Subject: [PATCH] refactoring the directories, especially for scripts; moved gimp stuff out to db since seems more appropriate. added a couple new things for burning blu-ray discs. responded to warnings reported by eclipse, hopefully correctly. --- .../patterns}/gimp/batch-whiteboard-clean.scm | 0 .../gimp/whiteboard_cleaner_notes.txt | 0 .../mysql/opensim/db_queries_for_opensim.txt | 8 +- doc/perl_tools.html | 9 - nucleus/library/basis/utf_conversion.cpp | 1367 +++++++++-------- nucleus/library/processes/state_machine.cpp | 47 +- .../tests_structures/test_hash_table.cpp | 5 +- .../tests_structures/test_int_hash.cpp | 2 +- .../library/textual/string_manipulation.cpp | 2 +- nucleus/library/timely/stopwatch.cpp | 2 +- nucleus/tools/dependency_tool/ifparser.cpp | 1 + nucleus/tools/dependency_tool/makedep.cpp | 1 + nucleus/tools/dependency_tool/parse.cpp | 4 + nucleus/tools/dependency_tool/pr.cpp | 2 +- scripts/core/common.alias | 1 + scripts/pictures/shrink_pics.sh | 17 + scripts/rip_burn/blu_burn.sh | 33 + scripts/rip_burn/blu_image.sh | 31 + 18 files changed, 869 insertions(+), 663 deletions(-) rename {scripts => database/patterns}/gimp/batch-whiteboard-clean.scm (100%) rename {scripts => database/patterns}/gimp/whiteboard_cleaner_notes.txt (100%) create mode 100644 scripts/pictures/shrink_pics.sh create mode 100644 scripts/rip_burn/blu_burn.sh create mode 100644 scripts/rip_burn/blu_image.sh diff --git a/scripts/gimp/batch-whiteboard-clean.scm b/database/patterns/gimp/batch-whiteboard-clean.scm similarity index 100% rename from scripts/gimp/batch-whiteboard-clean.scm rename to database/patterns/gimp/batch-whiteboard-clean.scm diff --git a/scripts/gimp/whiteboard_cleaner_notes.txt b/database/patterns/gimp/whiteboard_cleaner_notes.txt similarity index 100% rename from scripts/gimp/whiteboard_cleaner_notes.txt rename to database/patterns/gimp/whiteboard_cleaner_notes.txt diff --git a/database/patterns/mysql/opensim/db_queries_for_opensim.txt b/database/patterns/mysql/opensim/db_queries_for_opensim.txt index 27dbde43..cc43c7a7 100644 --- a/database/patterns/mysql/opensim/db_queries_for_opensim.txt +++ b/database/patterns/mysql/opensim/db_queries_for_opensim.txt @@ -12,9 +12,6 @@ find all the tables that have a CreatorID column: => yields assets inventoryitems prims primitems as tables matching the column. -(note: replacing all creator ids like below still did not secure total ownership -to fred; some things retained their original creator. how!?) - ============== replace all the creator ids with fred's id on ducky: @@ -39,5 +36,8 @@ if you have a more selective update to do, try replacing using a pattern: ============== - +note: replacing all creator ids like above still does not secure total +ownership for everything in inventory or even in the sim; nested objects in +inventory do not seem to be affected, and the contents of objects nested in +live objects also are not affected. diff --git a/doc/perl_tools.html b/doc/perl_tools.html index 4c0d2ef5..9954f765 100644 --- a/doc/perl_tools.html +++ b/doc/perl_tools.html @@ -224,15 +224,6 @@ subdirectory is created under the home directory as a storage place for the generated files. -

-

goodbye.pl

-
A logout script for exiting from a shell; it prints -a message using the nechung oracle for the user's benefit and starts a -byejob before exiting. The byejob will wait for a few seconds, then -clear the screen and print another fortune. It attempts to leave the -screen looking like a -standard login, but with an extra fortune.
-

new_sig.pl

Generates a signature file from the nechung diff --git a/nucleus/library/basis/utf_conversion.cpp b/nucleus/library/basis/utf_conversion.cpp index 11c127e9..807ecb60 100644 --- a/nucleus/library/basis/utf_conversion.cpp +++ b/nucleus/library/basis/utf_conversion.cpp @@ -1,17 +1,17 @@ /*****************************************************************************\ * * -* Name : utf_conversion * -* Author : Unicode, Inc. (C conversion functions) * -* Author : Chris Koeritz (C++ conversion classes) * -* * -******************************************************************************* -* Copyright (c) 2006-$now By Author. This program is free software; you can * -* redistribute it and/or modify it under the terms of the GNU General Public * -* License as published by the Free Software Foundation; either version 2 of * -* the License or (at your option) any later version. This is online at: * -* http://www.fsf.org/copyleft/gpl.html * -* Please send any updates to: fred@gruntose.com * -\*****************************************************************************/ + * Name : utf_conversion * + * Author : Unicode, Inc. (C conversion functions) * + * Author : Chris Koeritz (C++ conversion classes) * + * * + ******************************************************************************* + * Copyright (c) 2006-$now By Author. This program is free software; you can * + * redistribute it and/or modify it under the terms of the GNU General Public * + * License as published by the Free Software Foundation; either version 2 of * + * the License or (at your option) any later version. This is online at: * + * http://www.fsf.org/copyleft/gpl.html * + * Please send any updates to: fred@gruntose.com * + \*****************************************************************************/ //copyright below is relevant to UTF conversion methods only. /* @@ -38,21 +38,21 @@ /* --------------------------------------------------------------------- - Conversions between UTF32, UTF-16, and UTF-8. Source code file. - Author: Mark E. Davis, 1994. - Rev History: Rick McGowan, fixes & updates May 2001. - Sept 2001: fixed const & error conditions per - mods suggested by S. Parent & A. Lillich. - June 2002: Tim Dodd added detection and handling of incomplete - source sequences, enhanced error detection, added casts - to eliminate compiler warnings. - July 2003: slight mods to back out aggressive FFFE detection. - Jan 2004: updated switches in from-UTF8 conversions. - Oct 2004: updated to use UNI_MAX_LEGAL_UTF32 in UTF-32 conversions. + Conversions between UTF32, UTF-16, and UTF-8. Source code file. + Author: Mark E. Davis, 1994. + Rev History: Rick McGowan, fixes & updates May 2001. + Sept 2001: fixed const & error conditions per + mods suggested by S. Parent & A. Lillich. + June 2002: Tim Dodd added detection and handling of incomplete + source sequences, enhanced error detection, added casts + to eliminate compiler warnings. + July 2003: slight mods to back out aggressive FFFE detection. + Jan 2004: updated switches in from-UTF8 conversions. + Oct 2004: updated to use UNI_MAX_LEGAL_UTF32 in UTF-32 conversions. - See the header file "ConvertUTF.h" for complete documentation. + See the header file "ConvertUTF.h" for complete documentation. ------------------------------------------------------------------------- */ + ------------------------------------------------------------------------ */ #include "astring.h" #include "utf_conversion.h" @@ -60,635 +60,740 @@ #include #include #ifdef CVTUTF_DEBUG - #include +#include #endif namespace basis { -static const int halfShift = 10; /* used for shifting by 10 bits */ + static const int halfShift = 10; /* used for shifting by 10 bits */ -static const UTF32 halfBase = 0x0010000UL; -static const UTF32 halfMask = 0x3FFUL; + static const UTF32 halfBase = 0x0010000UL; + static const UTF32 halfMask = 0x3FFUL; #define UNI_SUR_HIGH_START (UTF32)0xD800 #define UNI_SUR_HIGH_END (UTF32)0xDBFF #define UNI_SUR_LOW_START (UTF32)0xDC00 #define UNI_SUR_LOW_END (UTF32)0xDFFF -/* --------------------------------------------------------------------- */ - -ConversionResult ConvertUTF32toUTF16 ( - const UTF32** sourceStart, const UTF32* sourceEnd, - UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) { - ConversionResult result = conversionOK; - const UTF32* source = *sourceStart; - UTF16* target = *targetStart; - while (source < sourceEnd) { - UTF32 ch; - if (target >= targetEnd) { - result = targetExhausted; break; - } - ch = *source++; - if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */ - /* UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values */ - if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { - if (flags == strictConversion) { - --source; /* return to the illegal value itself */ - result = sourceIllegal; - break; - } else { - *target++ = UNI_REPLACEMENT_CHAR; - } - } else { - *target++ = (UTF16)ch; /* normal case */ - } - } else if (ch > UNI_MAX_LEGAL_UTF32) { - if (flags == strictConversion) { - result = sourceIllegal; - } else { - *target++ = UNI_REPLACEMENT_CHAR; - } - } else { - /* target is a character in range 0xFFFF - 0x10FFFF. */ - if (target + 1 >= targetEnd) { - --source; /* Back up source pointer! */ - result = targetExhausted; break; - } - ch -= halfBase; - *target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START); - *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START); - } - } - *sourceStart = source; - *targetStart = target; - return result; -} - -/* --------------------------------------------------------------------- */ - -ConversionResult ConvertUTF16toUTF32 ( - const UTF16** sourceStart, const UTF16* sourceEnd, - UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) { - ConversionResult result = conversionOK; - const UTF16* source = *sourceStart; - UTF32* target = *targetStart; - UTF32 ch, ch2; - while (source < sourceEnd) { - const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */ - ch = *source++; - /* If we have a surrogate pair, convert to UTF32 first. */ - if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { - /* If the 16 bits following the high surrogate are in the source buffer... */ - if (source < sourceEnd) { - ch2 = *source; - /* If it's a low surrogate, convert to UTF32. */ - if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) { - ch = ((ch - UNI_SUR_HIGH_START) << halfShift) - + (ch2 - UNI_SUR_LOW_START) + halfBase; - ++source; - } else if (flags == strictConversion) { /* it's an unpaired high surrogate */ - --source; /* return to the illegal value itself */ - result = sourceIllegal; - break; - } - } else { /* We don't have the 16 bits following the high surrogate. */ - --source; /* return to the high surrogate */ - result = sourceExhausted; - break; - } - } else if (flags == strictConversion) { - /* UTF-16 surrogate values are illegal in UTF-32 */ - if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) { - --source; /* return to the illegal value itself */ - result = sourceIllegal; - break; - } - } - if (target >= targetEnd) { - source = oldSource; /* Back up source pointer! */ - result = targetExhausted; break; - } - *target++ = ch; - } - *sourceStart = source; - *targetStart = target; + /* --------------------------------------------------------------------- */ + + ConversionResult ConvertUTF32toUTF16(const UTF32** sourceStart, const UTF32* sourceEnd, + UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) + { + ConversionResult result = conversionOK; + const UTF32* source = *sourceStart; + UTF16* target = *targetStart; + while (source < sourceEnd) { + UTF32 ch; + if (target >= targetEnd) { + result = targetExhausted; + break; + } + ch = *source++; + if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */ + /* UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { + if (flags == strictConversion) { + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } else { + *target++ = UNI_REPLACEMENT_CHAR; + } + } else { + *target++ = (UTF16)ch; /* normal case */ + } + } else if (ch > UNI_MAX_LEGAL_UTF32) { + if (flags == strictConversion) { + result = sourceIllegal; + } else { + *target++ = UNI_REPLACEMENT_CHAR; + } + } else { + /* target is a character in range 0xFFFF - 0x10FFFF. */ + if (target + 1 >= targetEnd) { + --source; /* Back up source pointer! */ + result = targetExhausted; + break; + } + ch -= halfBase; + *target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START ); + *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START ); + } + } + *sourceStart = source; + *targetStart = target; + return result; + } + + /* --------------------------------------------------------------------- */ + + ConversionResult ConvertUTF16toUTF32(const UTF16** sourceStart, const UTF16* sourceEnd, + UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) + { + ConversionResult result = conversionOK; + const UTF16* source = *sourceStart; + UTF32* target = *targetStart; + UTF32 ch, ch2; + while (source < sourceEnd) { + const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */ + ch = *source++; + /* If we have a surrogate pair, convert to UTF32 first. */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { + /* If the 16 bits following the high surrogate are in the source buffer... */ + if (source < sourceEnd) { + ch2 = *source; + /* If it's a low surrogate, convert to UTF32. */ + if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) { + ch = ((ch - UNI_SUR_HIGH_START ) << halfShift) + (ch2 - UNI_SUR_LOW_START ) + + halfBase; + ++source; + } else if (flags == strictConversion) { /* it's an unpaired high surrogate */ + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } + } else { /* We don't have the 16 bits following the high surrogate. */ + --source; /* return to the high surrogate */ + result = sourceExhausted; + break; + } + } else if (flags == strictConversion) { + /* UTF-16 surrogate values are illegal in UTF-32 */ + if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) { + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } + } + if (target >= targetEnd) { + source = oldSource; /* Back up source pointer! */ + result = targetExhausted; + break; + } + *target++ = ch; + } + *sourceStart = source; + *targetStart = target; #ifdef CVTUTF_DEBUG -if (result == sourceIllegal) { - fprintf(stderr, "ConvertUTF16toUTF32 illegal seq 0x%04x,%04x\n", ch, ch2); - fflush(stderr); -} + if (result == sourceIllegal) { + fprintf(stderr, "ConvertUTF16toUTF32 illegal seq 0x%04x,%04x\n", ch, ch2); + fflush(stderr); + } #endif - return result; -} - -/* --------------------------------------------------------------------- */ - -/* - * Index into the table below with the first byte of a UTF-8 sequence to - * get the number of trailing bytes that are supposed to follow it. - * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is - * left as-is for anyone who may want to do such conversion, which was - * allowed in earlier algorithms. - */ -static const char trailingBytesForUTF8[256] = { - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 -}; - -/* - * Magic values subtracted from a buffer value during UTF8 conversion. - * This table contains as many values as there might be trailing bytes - * in a UTF-8 sequence. - */ -static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, - 0x03C82080UL, 0xFA082080UL, 0x82082080UL }; - -/* - * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed - * into the first byte, depending on how many bytes follow. There are - * as many entries in this table as there are UTF-8 sequence types. - * (I.e., one byte sequence, two byte... etc.). Remember that sequencs - * for *legal* UTF-8 will be 4 or fewer bytes total. - */ -static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; - -/* --------------------------------------------------------------------- */ - -/* The interface converts a whole buffer to avoid function-call overhead. - * Constants have been gathered. Loops & conditionals have been removed as - * much as possible for efficiency, in favor of drop-through switches. - * (See "Note A" at the bottom of the file for equivalent code.) - * If your compiler supports it, the "isLegalUTF8" call can be turned - * into an inline function. - */ - -/* --------------------------------------------------------------------- */ - -ConversionResult ConvertUTF16toUTF8 ( - const UTF16** sourceStart, const UTF16* sourceEnd, - UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) { - ConversionResult result = conversionOK; - const UTF16* source = *sourceStart; - UTF8* target = *targetStart; - while (source < sourceEnd) { - UTF32 ch; - unsigned short bytesToWrite = 0; - const UTF32 byteMask = 0xBF; - const UTF32 byteMark = 0x80; - const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */ - ch = *source++; - /* If we have a surrogate pair, convert to UTF32 first. */ - if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { - /* If the 16 bits following the high surrogate are in the source buffer... */ - if (source < sourceEnd) { - UTF32 ch2 = *source; - /* If it's a low surrogate, convert to UTF32. */ - if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) { - ch = ((ch - UNI_SUR_HIGH_START) << halfShift) - + (ch2 - UNI_SUR_LOW_START) + halfBase; - ++source; - } else if (flags == strictConversion) { /* it's an unpaired high surrogate */ - --source; /* return to the illegal value itself */ - result = sourceIllegal; - break; - } - } else { /* We don't have the 16 bits following the high surrogate. */ - --source; /* return to the high surrogate */ - result = sourceExhausted; - break; - } - } else if (flags == strictConversion) { - /* UTF-16 surrogate values are illegal in UTF-32 */ - if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) { - --source; /* return to the illegal value itself */ - result = sourceIllegal; - break; - } - } - /* Figure out how many bytes the result will require */ - if (ch < (UTF32)0x80) { bytesToWrite = 1; - } else if (ch < (UTF32)0x800) { bytesToWrite = 2; - } else if (ch < (UTF32)0x10000) { bytesToWrite = 3; - } else if (ch < (UTF32)0x110000) { bytesToWrite = 4; - } else { bytesToWrite = 3; - ch = UNI_REPLACEMENT_CHAR; - } - - target += bytesToWrite; - if (target > targetEnd) { - source = oldSource; /* Back up source pointer! */ - target -= bytesToWrite; result = targetExhausted; break; - } - switch (bytesToWrite) { /* note: everything falls through. */ - case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; - case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; - case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; - case 1: *--target = (UTF8)(ch | firstByteMark[bytesToWrite]); - } - target += bytesToWrite; - } - *sourceStart = source; - *targetStart = target; - return result; -} - -/* --------------------------------------------------------------------- */ - -/* - * Utility routine to tell whether a sequence of bytes is legal UTF-8. - * This must be called with the length pre-determined by the first byte. - * If not calling this from ConvertUTF8to*, then the length can be set by: - * length = trailingBytesForUTF8[*source]+1; - * and the sequence is illegal right away if there aren't that many bytes - * available. - * If presented with a length > 4, this returns false. The Unicode - * definition of UTF-8 goes up to 4-byte sequences. - */ - -static Booleano isLegalUTF8(const UTF8 *source, int length) { - UTF8 a; - const UTF8 *srcptr = source+length; - switch (length) { - default: return false; - /* Everything else falls through when "true"... */ - case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; - case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; - case 2: if ((a = (*--srcptr)) > 0xBF) return false; - - switch (*source) { - /* no fall-through in this inner switch */ - case 0xE0: if (a < 0xA0) return false; break; - case 0xED: if (a > 0x9F) return false; break; - case 0xF0: if (a < 0x90) return false; break; - case 0xF4: if (a > 0x8F) return false; break; - default: if (a < 0x80) return false; - } - - case 1: if (*source >= 0x80 && *source < 0xC2) return false; - } - if (*source > 0xF4) return false; - return true; -} - -/* --------------------------------------------------------------------- */ - -/* - * Exported function to return whether a UTF-8 sequence is legal or not. - * This is not used here; it's just exported. - */ -Booleano isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) { - int length = trailingBytesForUTF8[*source]+1; - if (source+length > sourceEnd) { - return false; - } - return isLegalUTF8(source, length); -} - -/* --------------------------------------------------------------------- */ - -ConversionResult ConvertUTF8toUTF16 ( - const UTF8** sourceStart, const UTF8* sourceEnd, - UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) { - ConversionResult result = conversionOK; - const UTF8* source = *sourceStart; - UTF16* target = *targetStart; - while (source < sourceEnd) { - UTF32 ch = 0; - unsigned short extraBytesToRead = trailingBytesForUTF8[*source]; - if (source + extraBytesToRead >= sourceEnd) { - result = sourceExhausted; break; - } - /* Do this check whether lenient or strict */ - if (! isLegalUTF8(source, extraBytesToRead+1)) { - result = sourceIllegal; - break; - } - /* - * The cases all fall through. See "Note A" below. - */ - switch (extraBytesToRead) { - case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */ - case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */ - case 3: ch += *source++; ch <<= 6; - case 2: ch += *source++; ch <<= 6; - case 1: ch += *source++; ch <<= 6; - case 0: ch += *source++; - } - ch -= offsetsFromUTF8[extraBytesToRead]; - - if (target >= targetEnd) { - source -= (extraBytesToRead+1); /* Back up source pointer! */ - result = targetExhausted; break; - } - if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */ - /* UTF-16 surrogate values are illegal in UTF-32 */ - if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { - if (flags == strictConversion) { - source -= (extraBytesToRead+1); /* return to the illegal value itself */ - result = sourceIllegal; - break; - } else { - *target++ = UNI_REPLACEMENT_CHAR; - } - } else { - *target++ = (UTF16)ch; /* normal case */ - } - } else if (ch > UNI_MAX_UTF16) { - if (flags == strictConversion) { - result = sourceIllegal; - source -= (extraBytesToRead+1); /* return to the start */ - break; /* Bail out; shouldn't continue */ - } else { - *target++ = UNI_REPLACEMENT_CHAR; - } - } else { - /* target is a character in range 0xFFFF - 0x10FFFF. */ - if (target + 1 >= targetEnd) { - source -= (extraBytesToRead+1); /* Back up source pointer! */ - result = targetExhausted; break; - } - ch -= halfBase; - *target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START); - *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START); - } - } - *sourceStart = source; - *targetStart = target; - return result; -} - -/* --------------------------------------------------------------------- */ - -ConversionResult ConvertUTF32toUTF8 ( - const UTF32** sourceStart, const UTF32* sourceEnd, - UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) { - ConversionResult result = conversionOK; - const UTF32* source = *sourceStart; - UTF8* target = *targetStart; - while (source < sourceEnd) { - UTF32 ch; - unsigned short bytesToWrite = 0; - const UTF32 byteMask = 0xBF; - const UTF32 byteMark = 0x80; - ch = *source++; - if (flags == strictConversion ) { - /* UTF-16 surrogate values are illegal in UTF-32 */ - if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { - --source; /* return to the illegal value itself */ - result = sourceIllegal; - break; - } - } - /* - * Figure out how many bytes the result will require. Turn any - * illegally large UTF32 things (> Plane 17) into replacement chars. - */ - if (ch < (UTF32)0x80) { bytesToWrite = 1; - } else if (ch < (UTF32)0x800) { bytesToWrite = 2; - } else if (ch < (UTF32)0x10000) { bytesToWrite = 3; - } else if (ch <= UNI_MAX_LEGAL_UTF32) { bytesToWrite = 4; - } else { bytesToWrite = 3; - ch = UNI_REPLACEMENT_CHAR; - result = sourceIllegal; - } - - target += bytesToWrite; - if (target > targetEnd) { - --source; /* Back up source pointer! */ - target -= bytesToWrite; result = targetExhausted; break; - } - switch (bytesToWrite) { /* note: everything falls through. */ - case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; - case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; - case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; - case 1: *--target = (UTF8) (ch | firstByteMark[bytesToWrite]); - } - target += bytesToWrite; - } - *sourceStart = source; - *targetStart = target; - return result; -} - -/* --------------------------------------------------------------------- */ - -ConversionResult ConvertUTF8toUTF32 ( - const UTF8** sourceStart, const UTF8* sourceEnd, - UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) { - ConversionResult result = conversionOK; - const UTF8* source = *sourceStart; - UTF32* target = *targetStart; - while (source < sourceEnd) { - UTF32 ch = 0; - unsigned short extraBytesToRead = trailingBytesForUTF8[*source]; - if (source + extraBytesToRead >= sourceEnd) { - result = sourceExhausted; break; - } - /* Do this check whether lenient or strict */ - if (! isLegalUTF8(source, extraBytesToRead+1)) { - result = sourceIllegal; - break; - } - /* - * The cases all fall through. See "Note A" below. - */ - switch (extraBytesToRead) { - case 5: ch += *source++; ch <<= 6; - case 4: ch += *source++; ch <<= 6; - case 3: ch += *source++; ch <<= 6; - case 2: ch += *source++; ch <<= 6; - case 1: ch += *source++; ch <<= 6; - case 0: ch += *source++; - } - ch -= offsetsFromUTF8[extraBytesToRead]; - - if (target >= targetEnd) { - source -= (extraBytesToRead+1); /* Back up the source pointer! */ - result = targetExhausted; break; - } - if (ch <= UNI_MAX_LEGAL_UTF32) { - /* - * UTF-16 surrogate values are illegal in UTF-32, and anything - * over Plane 17 (> 0x10FFFF) is illegal. - */ - if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { - if (flags == strictConversion) { - source -= (extraBytesToRead+1); /* return to the illegal value itself */ - result = sourceIllegal; - break; - } else { - *target++ = UNI_REPLACEMENT_CHAR; - } - } else { - *target++ = ch; - } - } else { /* i.e., ch > UNI_MAX_LEGAL_UTF32 */ - result = sourceIllegal; - *target++ = UNI_REPLACEMENT_CHAR; - } - } - *sourceStart = source; - *targetStart = target; - return result; -} - -/* --------------------------------------------------------------------- - - Note A. - The fall-through switches in UTF-8 reading code save a - temp variable, some decrements & conditionals. The switches - are equivalent to the following loop: - { - int tmpBytesToRead = extraBytesToRead+1; - do { - ch += *source++; - --tmpBytesToRead; - if (tmpBytesToRead) ch <<= 6; - } while (tmpBytesToRead > 0); - } - In UTF-8 writing code, the switches on "bytesToWrite" are - similarly unrolled loops. - - --------------------------------------------------------------------- */ + return result; + } + + /* --------------------------------------------------------------------- */ + + /* + * Index into the table below with the first byte of a UTF-8 sequence to + * get the number of trailing bytes that are supposed to follow it. + * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is + * left as-is for anyone who may want to do such conversion, which was + * allowed in earlier algorithms. + */ + static const char trailingBytesForUTF8[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5 }; + + /* + * Magic values subtracted from a buffer value during UTF8 conversion. + * This table contains as many values as there might be trailing bytes + * in a UTF-8 sequence. + */ + static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, + 0x03C82080UL, 0xFA082080UL, 0x82082080UL }; + + /* + * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed + * into the first byte, depending on how many bytes follow. There are + * as many entries in this table as there are UTF-8 sequence types. + * (I.e., one byte sequence, two byte... etc.). Remember that sequencs + * for *legal* UTF-8 will be 4 or fewer bytes total. + */ + static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; + + /* --------------------------------------------------------------------- */ + + /* The interface converts a whole buffer to avoid function-call overhead. + * Constants have been gathered. Loops & conditionals have been removed as + * much as possible for efficiency, in favor of drop-through switches. + * (See "Note A" at the bottom of the file for equivalent code.) + * If your compiler supports it, the "isLegalUTF8" call can be turned + * into an inline function. + */ + + /* --------------------------------------------------------------------- */ + + ConversionResult ConvertUTF16toUTF8(const UTF16** sourceStart, const UTF16* sourceEnd, + UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) + { + ConversionResult result = conversionOK; + const UTF16* source = *sourceStart; + UTF8* target = *targetStart; + while (source < sourceEnd) { + UTF32 ch; + unsigned short bytesToWrite = 0; + const UTF32 byteMask = 0xBF; + const UTF32 byteMark = 0x80; + const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */ + ch = *source++; + /* If we have a surrogate pair, convert to UTF32 first. */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { + /* If the 16 bits following the high surrogate are in the source buffer... */ + if (source < sourceEnd) { + UTF32 ch2 = *source; + /* If it's a low surrogate, convert to UTF32. */ + if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) { + ch = ((ch - UNI_SUR_HIGH_START ) << halfShift) + (ch2 - UNI_SUR_LOW_START ) + + halfBase; + ++source; + } else if (flags == strictConversion) { /* it's an unpaired high surrogate */ + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } + } else { /* We don't have the 16 bits following the high surrogate. */ + --source; /* return to the high surrogate */ + result = sourceExhausted; + break; + } + } else if (flags == strictConversion) { + /* UTF-16 surrogate values are illegal in UTF-32 */ + if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) { + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } + } + /* Figure out how many bytes the result will require */ + if (ch < (UTF32)0x80) { + bytesToWrite = 1; + } else if (ch < (UTF32)0x800) { + bytesToWrite = 2; + } else if (ch < (UTF32)0x10000) { + bytesToWrite = 3; + } else if (ch < (UTF32)0x110000) { + bytesToWrite = 4; + } else { + bytesToWrite = 3; + ch = UNI_REPLACEMENT_CHAR; + } + + target += bytesToWrite; + if (target > targetEnd) { + source = oldSource; /* Back up source pointer! */ + target -= bytesToWrite; + result = targetExhausted; + break; + } + switch (bytesToWrite) { /* note: everything falls through. */ + case 4: + *--target = (UTF8)((ch | byteMark) & byteMask); + ch >>= 6; + // no break + case 3: + *--target = (UTF8)((ch | byteMark) & byteMask); + ch >>= 6; + // no break. + case 2: + *--target = (UTF8)((ch | byteMark) & byteMask); + ch >>= 6; + // no break. + case 1: + *--target = (UTF8)(ch | firstByteMark[bytesToWrite]); + // no break. + } + target += bytesToWrite; + } + *sourceStart = source; + *targetStart = target; + return result; + } + + /* --------------------------------------------------------------------- */ + + /* + * Utility routine to tell whether a sequence of bytes is legal UTF-8. + * This must be called with the length pre-determined by the first byte. + * If not calling this from ConvertUTF8to*, then the length can be set by: + * length = trailingBytesForUTF8[*source]+1; + * and the sequence is illegal right away if there aren't that many bytes + * available. + * If presented with a length > 4, this returns false. The Unicode + * definition of UTF-8 goes up to 4-byte sequences. + */ + + static Booleano isLegalUTF8(const UTF8 *source, int length) + { + UTF8 a; + const UTF8 *srcptr = source + length; + switch (length) { + /* Everything else falls through when "true"... */ + case 4: { + if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; + } + /* no break */ + case 3: { + if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; + } + /* no break */ + case 2: { + if ((a = (*--srcptr)) > 0xBF) return false; + + switch (*source) { + /* no fall-through in this inner switch */ + case 0xE0: + if (a < 0xA0) return false; + break; + case 0xED: + if (a > 0x9F) return false; + break; + case 0xF0: + if (a < 0x90) return false; + break; + case 0xF4: + if (a > 0x8F) return false; + break; + default: + if (a < 0x80) return false; + break; + } + } + /* no break */ + case 1: { + if (*source >= 0x80 && *source < 0xC2) return false; + } + /* no break */ + default: { + return false; + } + } + if (*source > 0xF4) return false; + return true; + } + + /* --------------------------------------------------------------------- */ + + /* + * Exported function to return whether a UTF-8 sequence is legal or not. + * This is not used here; it's just exported. + */ + Booleano isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) + { + int length = trailingBytesForUTF8[*source] + 1; + if (source + length > sourceEnd) { + return false; + } + return isLegalUTF8(source, length); + } + + /* --------------------------------------------------------------------- */ + + ConversionResult ConvertUTF8toUTF16(const UTF8** sourceStart, const UTF8* sourceEnd, + UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) + { + ConversionResult result = conversionOK; + const UTF8* source = *sourceStart; + UTF16* target = *targetStart; + while (source < sourceEnd) { + UTF32 ch = 0; + unsigned short extraBytesToRead = trailingBytesForUTF8[*source]; + if (source + extraBytesToRead >= sourceEnd) { + result = sourceExhausted; + break; + } + /* Do this check whether lenient or strict */ + if (!isLegalUTF8(source, extraBytesToRead + 1)) { + result = sourceIllegal; + break; + } + /* + * The cases all fall through. See "Note A" below. + */ + switch (extraBytesToRead) { + case 5: + ch += *source++; + ch <<= 6; /* remember, illegal UTF-8 */ + /* no break */ + case 4: + ch += *source++; + ch <<= 6; /* remember, illegal UTF-8 */ + /* no break */ + case 3: + ch += *source++; + ch <<= 6; + /* no break */ + case 2: + ch += *source++; + ch <<= 6; + /* no break */ + case 1: + ch += *source++; + ch <<= 6; + /* no break */ + case 0: + ch += *source++; + /* no break */ + } + ch -= offsetsFromUTF8[extraBytesToRead]; + + if (target >= targetEnd) { + source -= (extraBytesToRead + 1); /* Back up source pointer! */ + result = targetExhausted; + break; + } + if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */ + /* UTF-16 surrogate values are illegal in UTF-32 */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { + if (flags == strictConversion) { + source -= (extraBytesToRead + 1); /* return to the illegal value itself */ + result = sourceIllegal; + break; + } else { + *target++ = UNI_REPLACEMENT_CHAR; + } + } else { + *target++ = (UTF16)ch; /* normal case */ + } + } else if (ch > UNI_MAX_UTF16) { + if (flags == strictConversion) { + result = sourceIllegal; + source -= (extraBytesToRead + 1); /* return to the start */ + break; /* Bail out; shouldn't continue */ + } else { + *target++ = UNI_REPLACEMENT_CHAR; + } + } else { + /* target is a character in range 0xFFFF - 0x10FFFF. */ + if (target + 1 >= targetEnd) { + source -= (extraBytesToRead + 1); /* Back up source pointer! */ + result = targetExhausted; + break; + } + ch -= halfBase; + *target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START ); + *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START ); + } + } + *sourceStart = source; + *targetStart = target; + return result; + } + + /* --------------------------------------------------------------------- */ + + ConversionResult ConvertUTF32toUTF8(const UTF32** sourceStart, const UTF32* sourceEnd, + UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) + { + ConversionResult result = conversionOK; + const UTF32* source = *sourceStart; + UTF8* target = *targetStart; + while (source < sourceEnd) { + UTF32 ch; + unsigned short bytesToWrite = 0; + const UTF32 byteMask = 0xBF; + const UTF32 byteMark = 0x80; + ch = *source++; + if (flags == strictConversion) { + /* UTF-16 surrogate values are illegal in UTF-32 */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } + } + /* + * Figure out how many bytes the result will require. Turn any + * illegally large UTF32 things (> Plane 17) into replacement chars. + */ + if (ch < (UTF32)0x80) { + bytesToWrite = 1; + } else if (ch < (UTF32)0x800) { + bytesToWrite = 2; + } else if (ch < (UTF32)0x10000) { + bytesToWrite = 3; + } else if (ch <= UNI_MAX_LEGAL_UTF32) { + bytesToWrite = 4; + } else { + bytesToWrite = 3; + ch = UNI_REPLACEMENT_CHAR; + result = sourceIllegal; + } + + target += bytesToWrite; + if (target > targetEnd) { + --source; /* Back up source pointer! */ + target -= bytesToWrite; + result = targetExhausted; + break; + } + switch (bytesToWrite) { /* note: everything falls through. */ + case 4: + *--target = (UTF8)((ch | byteMark) & byteMask); + ch >>= 6; + /* no break */ + case 3: + *--target = (UTF8)((ch | byteMark) & byteMask); + ch >>= 6; + /* no break */ + case 2: + *--target = (UTF8)((ch | byteMark) & byteMask); + ch >>= 6; + /* no break */ + case 1: + *--target = (UTF8)(ch | firstByteMark[bytesToWrite]); + /* no break */ + } + target += bytesToWrite; + } + *sourceStart = source; + *targetStart = target; + return result; + } + + /* --------------------------------------------------------------------- */ + + ConversionResult ConvertUTF8toUTF32(const UTF8** sourceStart, const UTF8* sourceEnd, + UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) + { + ConversionResult result = conversionOK; + const UTF8* source = *sourceStart; + UTF32* target = *targetStart; + while (source < sourceEnd) { + UTF32 ch = 0; + unsigned short extraBytesToRead = trailingBytesForUTF8[*source]; + if (source + extraBytesToRead >= sourceEnd) { + result = sourceExhausted; + break; + } + /* Do this check whether lenient or strict */ + if (!isLegalUTF8(source, extraBytesToRead + 1)) { + result = sourceIllegal; + break; + } + /* + * The cases all fall through. See "Note A" below. + */ + switch (extraBytesToRead) { + case 5: + ch += *source++; + ch <<= 6; + /* no break */ + case 4: + ch += *source++; + ch <<= 6; + /* no break */ + case 3: + ch += *source++; + ch <<= 6; + /* no break */ + case 2: + ch += *source++; + ch <<= 6; + /* no break */ + case 1: + ch += *source++; + ch <<= 6; + /* no break */ + case 0: + ch += *source++; + /* no break */ + } + ch -= offsetsFromUTF8[extraBytesToRead]; + + if (target >= targetEnd) { + source -= (extraBytesToRead + 1); /* Back up the source pointer! */ + result = targetExhausted; + break; + } + if (ch <= UNI_MAX_LEGAL_UTF32) { + /* + * UTF-16 surrogate values are illegal in UTF-32, and anything + * over Plane 17 (> 0x10FFFF) is illegal. + */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { + if (flags == strictConversion) { + source -= (extraBytesToRead + 1); /* return to the illegal value itself */ + result = sourceIllegal; + break; + } else { + *target++ = UNI_REPLACEMENT_CHAR; + } + } else { + *target++ = ch; + } + } else { /* i.e., ch > UNI_MAX_LEGAL_UTF32 */ + result = sourceIllegal; + *target++ = UNI_REPLACEMENT_CHAR; + } + } + *sourceStart = source; + *targetStart = target; + return result; + } + + /* --------------------------------------------------------------------- + + Note A. + The fall-through switches in UTF-8 reading code save a + temp variable, some decrements & conditionals. The switches + are equivalent to the following loop: + { + int tmpBytesToRead = extraBytesToRead+1; + do { + ch += *source++; + --tmpBytesToRead; + if (tmpBytesToRead) ch <<= 6; + } while (tmpBytesToRead > 0); + } + In UTF-8 writing code, the switches on "bytesToWrite" are + similarly unrolled loops. + + --------------------------------------------------------------------- */ ////////////// - #ifdef __cplusplus -transcode_to_utf16::transcode_to_utf16(const char *utf8_input) -: _orig_length(int(strlen(utf8_input)) + 1), - _converted(new UTF16[_orig_length + 1]) - // we don't ever expect the string to get longer going to the larger data - // type, so the current length should be enough. -{ - _result = conversionOK; - if (_orig_length == 1) { - // no length, so only provide a blank string. - _converted[0] = 0; - return; - } - memset((abyte *)_converted, 0, 2 * _orig_length); - // we use these temporary pointers since the converter resets the source - // and target pointers to the end of the conversion. the same pattern - // is used in the code below. - const UTF8 *temp_in = (const UTF8 *)utf8_input; - UTF16 *temp_out = _converted; - _result = ConvertUTF8toUTF16(&temp_in, temp_in + _orig_length, - &temp_out, temp_out + _orig_length, lenientConversion); -} - -transcode_to_utf16::transcode_to_utf16(const astring &utf8_input) -: _orig_length(utf8_input.length() + 1), - _converted(new UTF16[_orig_length]) -{ - _result = conversionOK; - if (_orig_length == 1) { - // no length, so only provide a blank string. - _converted[0] = 0; - return; - } - memset((abyte *)_converted, 0, 2 * _orig_length); - const UTF8 *temp_in = (const UTF8 *)utf8_input.observe(); - UTF16 *temp_out = _converted; - _result = ConvertUTF8toUTF16(&temp_in, temp_in + _orig_length, - &temp_out, temp_out + _orig_length, lenientConversion); -} - -transcode_to_utf16::~transcode_to_utf16() -{ - delete [] _converted; - _converted = NIL; -} - -int transcode_to_utf16::length() const -{ return int(wcslen((wchar_t *)_converted)); } + transcode_to_utf16::transcode_to_utf16(const char *utf8_input) + : _orig_length(int(strlen(utf8_input)) + 1), _converted(new UTF16[_orig_length + 1]) + // we don't ever expect the string to get longer going to the larger data + // type, so the current length should be enough. + { + _result = conversionOK; + if (_orig_length == 1) { + // no length, so only provide a blank string. + _converted[0] = 0; + return; + } + memset((abyte *)_converted, 0, 2 * _orig_length); + // we use these temporary pointers since the converter resets the source + // and target pointers to the end of the conversion. the same pattern + // is used in the code below. + const UTF8 *temp_in = (const UTF8 *)utf8_input; + UTF16 *temp_out = _converted; + _result = ConvertUTF8toUTF16(&temp_in, temp_in + _orig_length, &temp_out, + temp_out + _orig_length, lenientConversion); + } + + transcode_to_utf16::transcode_to_utf16(const astring &utf8_input) + : _orig_length(utf8_input.length() + 1), _converted(new UTF16[_orig_length]) + { + _result = conversionOK; + if (_orig_length == 1) { + // no length, so only provide a blank string. + _converted[0] = 0; + return; + } + memset((abyte *)_converted, 0, 2 * _orig_length); + const UTF8 *temp_in = (const UTF8 *)utf8_input.observe(); + UTF16 *temp_out = _converted; + _result = ConvertUTF8toUTF16(&temp_in, temp_in + _orig_length, &temp_out, + temp_out + _orig_length, lenientConversion); + } + + transcode_to_utf16::~transcode_to_utf16() + { + delete[] _converted; + _converted = NIL; + } + + int transcode_to_utf16::length() const + { + return int(wcslen((wchar_t *)_converted)); + } ////////////// -transcode_to_utf8::transcode_to_utf8(const UTF16 *utf16_input) -: _orig_length(int(wcslen((const wchar_t *)utf16_input))), - _new_length(_orig_length * 2 + _orig_length / 2 + 1), - // this is just an estimate. it may be appropriate most of the time. - // whatever doesn't fit will get truncated. - _converted(new UTF8[_new_length]) -{ - _result = conversionOK; - if (_orig_length == 0) { - // no length, so only provide a blank string. - _converted[0] = 0; - return; - } - memset(_converted, 0, _new_length); - const UTF16 *temp_in = (const UTF16 *)utf16_input; - UTF8 *temp_out = _converted; - _result = ConvertUTF16toUTF8(&temp_in, temp_in + _orig_length, - &temp_out, temp_out + _new_length, lenientConversion); -} - -transcode_to_utf8::transcode_to_utf8(const wchar_t *utf16_input) -: _orig_length(int(wcslen(utf16_input))), - _new_length(_orig_length * 2 + _orig_length / 2 + 1), - // this is just an estimate. it may be appropriate most of the time. - // whatever doesn't fit will get truncated. - _converted(new UTF8[_new_length > 0 ? _new_length : 1]) -{ - _result = conversionOK; - if (_orig_length == 0) { - // no length, so only provide a blank string. - _converted[0] = 0; - return; - } - memset(_converted, 0, _new_length); - const UTF16 *temp_in = (const UTF16 *)utf16_input; - UTF8 *temp_out = _converted; - _result = ConvertUTF16toUTF8(&temp_in, temp_in + _orig_length, - &temp_out, temp_out + _new_length, lenientConversion); -} - -transcode_to_utf8::~transcode_to_utf8() -{ - delete [] _converted; - _converted = NIL; -} - -int transcode_to_utf8::length() const -{ return int(strlen((char *)_converted)); } - -transcode_to_utf8::operator astring() const -{ return astring((char *)_converted); } + transcode_to_utf8::transcode_to_utf8(const UTF16 *utf16_input) + : _orig_length(int(wcslen((const wchar_t *)utf16_input))), + _new_length(_orig_length * 2 + _orig_length / 2 + 1), + // this is just an estimate. it may be appropriate most of the time. + // whatever doesn't fit will get truncated. + _converted(new UTF8[_new_length]) + { + _result = conversionOK; + if (_orig_length == 0) { + // no length, so only provide a blank string. + _converted[0] = 0; + return; + } + memset(_converted, 0, _new_length); + const UTF16 *temp_in = (const UTF16 *)utf16_input; + UTF8 *temp_out = _converted; + _result = ConvertUTF16toUTF8(&temp_in, temp_in + _orig_length, &temp_out, + temp_out + _new_length, lenientConversion); + } + + transcode_to_utf8::transcode_to_utf8(const wchar_t *utf16_input) + : _orig_length(int(wcslen(utf16_input))), + _new_length(_orig_length * 2 + _orig_length / 2 + 1), + // this is just an estimate. it may be appropriate most of the time. + // whatever doesn't fit will get truncated. + _converted(new UTF8[_new_length > 0 ? _new_length : 1]) + { + _result = conversionOK; + if (_orig_length == 0) { + // no length, so only provide a blank string. + _converted[0] = 0; + return; + } + memset(_converted, 0, _new_length); + const UTF16 *temp_in = (const UTF16 *)utf16_input; + UTF8 *temp_out = _converted; + _result = ConvertUTF16toUTF8(&temp_in, temp_in + _orig_length, &temp_out, + temp_out + _new_length, lenientConversion); + } + + transcode_to_utf8::~transcode_to_utf8() + { + delete[] _converted; + _converted = NIL; + } + + int transcode_to_utf8::length() const + { + return int(strlen((char *)_converted)); + } + + transcode_to_utf8::operator astring() const + { + return astring((char *)_converted); + } ////////////// -null_transcoder::null_transcoder(const char *utf8_input, bool make_own_copy) -: _make_own_copy(make_own_copy), - _converted(make_own_copy? new UTF8[strlen(utf8_input) + 1] - : (const UTF8 *)utf8_input) -{ - if (_make_own_copy) { - strcpy((char *)_converted, utf8_input); - } -} - -null_transcoder::null_transcoder(const astring &utf8_input, bool make_own_copy) -: _make_own_copy(make_own_copy), - _converted(make_own_copy? new UTF8[utf8_input.length() + 1] - : (const UTF8 *)utf8_input.s()) -{ - if (_make_own_copy) { - strcpy((char *)_converted, utf8_input.s()); - } -} - -int null_transcoder::length() const -{ return int(strlen((char *)_converted)); } + null_transcoder::null_transcoder(const char *utf8_input, bool make_own_copy) + : _make_own_copy(make_own_copy), + _converted(make_own_copy ? new UTF8[strlen(utf8_input) + 1] : (const UTF8 *)utf8_input) + { + if (_make_own_copy) { + strcpy((char *)_converted, utf8_input); + } + } + + null_transcoder::null_transcoder(const astring &utf8_input, bool make_own_copy) + : _make_own_copy(make_own_copy), + _converted( + make_own_copy ? new UTF8[utf8_input.length() + 1] : (const UTF8 *)utf8_input.s()) + { + if (_make_own_copy) { + strcpy((char *)_converted, utf8_input.s()); + } + } + + int null_transcoder::length() const + { + return int(strlen((char *)_converted)); + } #endif //_cplusplus - -} //namespace. +} //namespace. diff --git a/nucleus/library/processes/state_machine.cpp b/nucleus/library/processes/state_machine.cpp index cb0a2d65..e2f7150d 100644 --- a/nucleus/library/processes/state_machine.cpp +++ b/nucleus/library/processes/state_machine.cpp @@ -75,20 +75,39 @@ struct override { int current; int next; int duration; : current(_current), next(_next), duration(_duration) {} }; -struct transition_info { - enum transition_type { SIMPLE, RANGE, TIMED }; - transition_type type; - int next_state; - int low_trigger, high_trigger; - int time_span; - - transition_info() {} // blank. - transition_info(int next) : type(SIMPLE), next_state(next) {} - transition_info(int next, int time) : type(TIMED), next_state(next), - time_span(time) {} - transition_info(int next, int low, int high) : type(RANGE), - next_state(next), low_trigger(low), high_trigger(high) {} -}; + struct transition_info + { + enum transition_type + { + SIMPLE, RANGE, TIMED + }; + transition_type type; + int next_state; + int low_trigger, high_trigger; + int time_span; + + // blank constructor. + transition_info() + : type(RANGE), next_state(0), low_trigger(0), high_trigger(0), time_span(0) + { + } + + transition_info(int next) + : type(SIMPLE), next_state(next), + low_trigger(0), high_trigger(0), time_span(0) + { + } + + transition_info(int next, int time) + : type(TIMED), next_state(next), time_span(time), low_trigger(0), high_trigger(0) + { + } + + transition_info(int next, int low, int high) + : type(RANGE), next_state(next), low_trigger(low), high_trigger(high), time_span(0) + { + } + }; struct state_info { int state_id; // id for this state. diff --git a/nucleus/library/tests_structures/test_hash_table.cpp b/nucleus/library/tests_structures/test_hash_table.cpp index e4486f60..0fc446c2 100644 --- a/nucleus/library/tests_structures/test_hash_table.cpp +++ b/nucleus/library/tests_structures/test_hash_table.cpp @@ -95,7 +95,9 @@ public: DEFINE_CLASS_NAME("test_hash_table"); int raw_random_id(); //!< returns an unvetted random number. - int unused_random_id(); //!< returns an unused (so far) random number. + + //! returns an unused (so far) random number. + int unused_random_id(); int execute(); // the main startup for the test. @@ -159,6 +161,7 @@ int test_hash_table::unused_random_id() int checking = raw_random_id(); if (!_keys_in_use.member(checking)) return checking; // got one. } // keep going until we find unused id. + return -1; // this is a failure, but we will never get here. } int test_hash_table::execute() diff --git a/nucleus/library/tests_structures/test_int_hash.cpp b/nucleus/library/tests_structures/test_int_hash.cpp index 72193eeb..7fb8c17d 100644 --- a/nucleus/library/tests_structures/test_int_hash.cpp +++ b/nucleus/library/tests_structures/test_int_hash.cpp @@ -102,7 +102,7 @@ public: data_shuttle() : snacky_string(string_manipulation::make_random_name()), - chunk(chao.inclusive(100, 10000)) {} + chunk(chao.inclusive(100, 10000)), food_bar(0), hungry(false) {} }; ////////////// diff --git a/nucleus/library/textual/string_manipulation.cpp b/nucleus/library/textual/string_manipulation.cpp index 6e21691a..a0febcaa 100644 --- a/nucleus/library/textual/string_manipulation.cpp +++ b/nucleus/library/textual/string_manipulation.cpp @@ -190,7 +190,7 @@ void string_manipulation::split_lines(const astring &input_in, astring &output, break_line = true; just_had_break = true; put_accum_before_break = true; - // intentional fall-through. + // intentional fall-through, so no break. case '\t': case ' ': invisible = true; next_break--; // don't include it in what's printed. diff --git a/nucleus/library/timely/stopwatch.cpp b/nucleus/library/timely/stopwatch.cpp index d5661cbc..aeff6623 100644 --- a/nucleus/library/timely/stopwatch.cpp +++ b/nucleus/library/timely/stopwatch.cpp @@ -89,7 +89,7 @@ int stopwatch::common_measure() // stop stopwatch, restart afterwards. halt(); restart = true; - // intentional fall through to default. + // intentional fall through to default, so no break. default: // set the return value to the accumulated time. to_return = _total_so_far; diff --git a/nucleus/tools/dependency_tool/ifparser.cpp b/nucleus/tools/dependency_tool/ifparser.cpp index b4de6824..545e0af7 100644 --- a/nucleus/tools/dependency_tool/ifparser.cpp +++ b/nucleus/tools/dependency_tool/ifparser.cpp @@ -171,6 +171,7 @@ static const char *parse_value(IfParser *g, const char *cp, int *valp) *valp = (*(g->funcs.eval_defined)) (g, var, int(cp - var)); return cp + paren; /* skip the right paren */ } + break; /* fall out */ } diff --git a/nucleus/tools/dependency_tool/makedep.cpp b/nucleus/tools/dependency_tool/makedep.cpp index 0a3844b8..a2475e25 100644 --- a/nucleus/tools/dependency_tool/makedep.cpp +++ b/nucleus/tools/dependency_tool/makedep.cpp @@ -404,6 +404,7 @@ int main(int argc, char **argv) if (endmarker) break; /* fatalerr("unknown opt = %s\n", argv[0]); */ warning("ignoring option %s\n", argv[0]); + break; } } diff --git a/nucleus/tools/dependency_tool/parse.cpp b/nucleus/tools/dependency_tool/parse.cpp index 6bb46237..bc8537a8 100644 --- a/nucleus/tools/dependency_tool/parse.cpp +++ b/nucleus/tools/dependency_tool/parse.cpp @@ -109,9 +109,12 @@ int find_includes(struct filepointer *filep, inclist *file, inclist *file_red, i case ELIF: if (!recursion) gobble(filep, file, file_red); + //hmmm: is this right? + /* no break */ case ENDIF: if (recursion) return(type); + break; case DEFINE: define(line, file); break; @@ -287,6 +290,7 @@ int deftype(register char *line, register struct filepointer *filep, case IFNDEF: debug(0,("%s, line %d: #%s %s\n", file->i_file, filep->f_line, directives[ret], p)); + //hmmm: no break here either? case UNDEF: /* * separate the name of a single symbol. diff --git a/nucleus/tools/dependency_tool/pr.cpp b/nucleus/tools/dependency_tool/pr.cpp index e1bdf7b3..6c414ef7 100644 --- a/nucleus/tools/dependency_tool/pr.cpp +++ b/nucleus/tools/dependency_tool/pr.cpp @@ -33,7 +33,7 @@ in this Software without prior written authorization from the X Consortium. #include -extern struct inclist inc_list[ MAXFILES ], *inclistp; +//extern struct inclist inc_list[ MAXFILES ], *inclistp; extern char *objprefix; extern char *objsuffix; extern int width; diff --git a/scripts/core/common.alias b/scripts/core/common.alias index 860852b4..44cf303c 100644 --- a/scripts/core/common.alias +++ b/scripts/core/common.alias @@ -78,6 +78,7 @@ alias up='cd ..' # extended aliases for meta-operations. alias dvd_rip='vobcopy -m' +alias blu_rip='echo "what would this command be?"' alias mplayer='\mplayer -ao arts' alias play='bash $FEISTY_MEOW_SCRIPTS/multimedia/sound_play.sh' diff --git a/scripts/pictures/shrink_pics.sh b/scripts/pictures/shrink_pics.sh new file mode 100644 index 00000000..d4b442d5 --- /dev/null +++ b/scripts/pictures/shrink_pics.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +# picture shrinker, thanks to dang. + +if [ $# -lt 2 ]; then + echo "This script needs at least 2 parameters: parm 1 is the reduction percentage" + echo "to use when shrinking pictures, and parm 2 (and 3, 4, etc) is a filename" + echo "to shrink." + exit 1 +fi + +percentage="$1"; shift + +for i in $*; do + mogrify -resize $percentage% $i; +done + diff --git a/scripts/rip_burn/blu_burn.sh b/scripts/rip_burn/blu_burn.sh new file mode 100644 index 00000000..26f071b5 --- /dev/null +++ b/scripts/rip_burn/blu_burn.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +function show_usage() +{ + echo "This script needs two parameters, (1) an ISO file that provides the" + echo "data for the blu-ray and (2) the device to use for burning, e.g." + echo " $(basename $0) ~/grunge.iso /dev/sr1" +} + +iso_name="$1"; shift +device_name="$1"; shift + +if [ -z "$iso_name" -o -z "$device_name" ]; then + show_usage + exit 3 +fi + +if [ ! -f "$iso_name" ]; then + echo -e "The ISO file must already exist.\n" + show_usage + exit 3 +fi + +if [ ! -b "$device_name" ]; then + echo -e "The device name provided must exist and be block-special type.\n" + show_usage + exit 3 +fi + +"$iso_name" "$device_name" + +growisofs -speed=2 -dvd-compat -Z ${device_name}=${iso_name} + diff --git a/scripts/rip_burn/blu_image.sh b/scripts/rip_burn/blu_image.sh new file mode 100644 index 00000000..8f22f2ed --- /dev/null +++ b/scripts/rip_burn/blu_image.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +function show_usage() +{ + echo "This script needs two parameters, (1) an ISO file to create, and" + echo "(2) a folder to use as the blu-ray data for the ISO. For example," + echo " $(basename $0) ~/grunge.iso ~/dvdimages/grungebandpro" +} + +iso_name="$1"; shift +folder_name="$1"; shift + +if [ -z "$iso_name" -o -z "$folder_name" ]; then + show_usage + exit 3 +fi + +if [ -f "$iso_name" ]; then + echo -e "The ISO file must not already exist.\n" + show_usage + exit 3 +fi + +if [ ! -d "$folder_name" ]; then + echo -e "The provided folder name must exist.\n" + show_usage + exit 3 +fi + +genisoimage -r -J -o "$iso_name" "$folder_name" + -- 2.34.1