core/library/textual/byte_formatter.cpp

   1 /*****************************************************************************\
   2 *                                                                             *
   3 *  Name   : byte_formatter                                                    *
   4 *  Author : Chris Koeritz                                                     *
   5 *                                                                             *
   6 *******************************************************************************
   7 * Copyright (c) 1992-$now By Author.  This program is free software; you can  *
   8 * redistribute it and/or modify it under the terms of the GNU General Public  *
   9 * License as published by the Free Software Foundation; either version 2 of   *
  10 * the License or (at your option) any later version.  This is online at:      *
  11 *     http://www.fsf.org/copyleft/gpl.html                                    *
  12 * Please send any updates to: fred@gruntose.com                               *
  13 \*****************************************************************************/
  14
  15 #include "byte_formatter.h"
  16 #include "parser_bits.h"
  17 #include "string_manipulation.h"
  18
  19 #include <basis/functions.h>
  20 #include <structures/bit_vector.h>
  21 #include <structures/string_array.h>
  22
  23 //#define DEBUG_BYTE_FORMAT
  24   // uncomment for noisier version.
  25
  26 #undef LOG
  27 #ifdef DEBUG_BYTE_FORMAT
  28   #define LOG(s) printf("%s\n", astring(s).s())
  29 #else
  30   #define LOG(s) {}
  31 #endif
  32
  33 #define LINE_SIZE 80
  34
  35 using namespace basis;
  36 using namespace structures;
  37
  38 namespace textual {
  39
  40 void byte_formatter::print_char(abyte to_print, astring &out, char replace)
  41 {
  42   int temp = to_print % 128;
  43   if (!parser_bits::is_printable_ascii(to_print)) out += replace;
  44   else out += char(temp);
  45 }
  46
  47 void byte_formatter::print_chars(const abyte *to_print, int len, astring &out, char replace)
  48 {
  49   for (int i = 0; i < len; i++)
  50     print_char(to_print[i], out, replace);
  51 }
  52
  53 void byte_formatter::make_eight(basis::un_int num, astring &out)
  54 {
  55   basis::un_int thresh = 0x10000000;
  56   while (thresh >= 0x10) {
  57     if (num < thresh)
  58       out += '0';
  59     thresh >>= 4;  // zap a nibble.
  60   }
  61 }
  62
  63 astring byte_formatter::text_dump(const abyte *location, basis::un_int length, basis::un_int label,
  64     const char *eol)
  65 {
  66   astring to_return;
  67   text_dump(to_return, location, length, label, eol);
  68   return to_return;
  69 }
  70
  71 void byte_formatter::text_dump(astring &output, const byte_array &to_dump, basis::un_int label,
  72     const char *eol)
  73 {
  74   text_dump(output, to_dump.observe(), to_dump.length(), label, eol);
  75 }
  76
  77 astring byte_formatter::text_dump(const byte_array &to_dump, basis::un_int label, const char *eol)
  78 {
  79   astring output;
  80   text_dump(output, to_dump.observe(), to_dump.length(), label, eol);
  81   return output;
  82 }
  83
  84 // this is the real version of text_dump.  all the others use it.
  85 void byte_formatter::text_dump(astring &to_return, const abyte *location, basis::un_int length,
  86     basis::un_int label, const char *eol)
  87 {
  88   to_return = "";
  89   int entry_size = 4;
  90   int preamble = 14;
  91
  92   basis::un_int entries_per_line = (LINE_SIZE - preamble) / entry_size;
  93
  94   for (basis::un_int i = 0; i < length; i += entries_per_line) {
  95     make_eight(i + label, to_return);
  96     to_return += astring(astring::SPRINTF, "%x", i + label) + astring(" | ");
  97     for (basis::un_int j = 0; j < entries_per_line; j++) {
  98       if (i + j >= length) {
  99         // if at the end of the loop, just print spaces.
 100         to_return += "   ";
 101       } else {
 102         int ord_of_current_char = *(location + i + j) & 0xFF;
 103         if (ord_of_current_char < 0x10) to_return += '0';
 104         to_return += astring(astring::SPRINTF, "%x", int(ord_of_current_char));
 105         to_return += ' ';
 106       }
 107     }
 108
 109     to_return += "| ";
 110     for (basis::un_int k = i; k < i + entries_per_line; k++) {
 111       if (k >= length) to_return += ' ';
 112         // if past the end of the block, just add spaces.
 113       else print_char(*(location + k), to_return);
 114     }
 115     to_return += astring(" |") + eol;
 116   }
 117 }
 118
 119 void byte_formatter::parse_dump(const astring &dumped_form, byte_array &bytes_found)
 120 {
 121   bytes_found.reset();
 122   string_array lines_found;
 123   // iterate over the string and break it up into lines.
 124   for (int i = 0; i < dumped_form.length(); i++) {
 125     int indy = dumped_form.find('\n', i);
 126 //hmmm: not platform invariant.  what about '\r' if we see it?
 127
 128     if (negative(indy)) {
 129       // no more lines found.
 130       if (i < dumped_form.length() - 1) {
 131         // grab the last bit as a line.
 132         lines_found += dumped_form.substring(i, dumped_form.length() - 1);
 133       }
 134       break;
 135     }
 136     // found a normal line ending, so drop everything from the current
 137     // position up to the ending into the list of strings.
 138     lines_found += dumped_form.substring(i, indy - 1);
 139     i = indy + 1;  // jump to next potential line.
 140   }
 141   // now process the lines that we've found.
 142   for (int j = 0; j < lines_found.length(); j++) {
 143     // first step is to find the pipe character that brackets the actual
 144     // data.  we ignore the "address" located before the pipe.
 145     astring &s = lines_found[j];
 146     int bar_one = s.find('|', 0);
 147     if (negative(bar_one)) continue;  // skip this one; it's malformed.
 148     // now we look for the second pipe that comes before the text form of
 149     // the data.  we don't care about the text or anything after.
 150     int bar_two = s.find('|', bar_one + 1);
 151     if (negative(bar_two)) continue;  // skip for same reason.
 152     astring s2 = s.substring(bar_one + 1, bar_two - 1);
 153     byte_array this_part;
 154     string_to_bytes(s2, this_part);
 155     bytes_found += this_part;
 156   }
 157 }
 158
 159 //////////////
 160
 161 void byte_formatter::bytes_to_string(const abyte *to_convert, int length, astring &as_string,
 162     bool space_delimited)
 163 {
 164   if (!to_convert || !length) return;  // nothing to do.
 165   if (negative(length)) return;  // bunk.
 166   as_string = "";  // reset the output parameter.
 167
 168   // the pattern is used for printing the bytes and considering the delimiter.
 169   astring pattern("%02x");
 170   if (space_delimited) pattern += " ";
 171
 172   // now zip through the array and dump it into the string.
 173   for (int i = 0; i < length; i++)
 174     as_string += astring(astring::SPRINTF, pattern.s(), to_convert[i]);
 175 }
 176
 177 // returns true if the character is within the valid ranges of hexadecimal
 178 // nibbles (as text).
 179 bool byte_formatter::in_hex_range(char to_check)
 180 //hmmm: move this to parser bits.
 181 {
 182   return ( (to_check <= '9') && (to_check >= '0') )
 183       || ( (to_check <= 'f') && (to_check >= 'a') )
 184       || ( (to_check <= 'F') && (to_check >= 'A') );
 185 }
 186
 187 void byte_formatter::string_to_bytes(const char *to_convert, byte_array &as_array)
 188 {
 189   as_array.reset();  // clear the array.
 190   const int len = int(strlen(to_convert));
 191
 192   // the parser uses a simple state machine for processing the string.
 193   enum states { FINDING_HEX, IGNORING_JUNK };
 194   states state = IGNORING_JUNK;
 195
 196   int digits = 0;  // the number of digits we've currently found.
 197   int accumulator = 0;  // the current hex duo.
 198
 199   // loop through the string.
 200   for (int i = 0; i < len; i++) {
 201     switch (state) {
 202       case IGNORING_JUNK: {
 203         if (in_hex_range(to_convert[i])) {
 204           i--;  // skip back to where we were before now.
 205           state = FINDING_HEX;
 206           continue;  // jump to the other state.
 207         }
 208         // otherwise, we could care less what the character is.
 209         break;
 210       }
 211       case FINDING_HEX: {
 212         if (digits >= 2) {
 213           // we have finished a hex byte.
 214           as_array += abyte(accumulator);
 215           accumulator = 0;
 216           digits = 0;
 217           i--;  // skip back for the byte we haven't eaten yet.
 218           state = IGNORING_JUNK;  // jump to other state for a new item.
 219           continue;
 220         }
 221         // we really think this is a digit here and we're not through with
 222         // accumulating them.
 223         accumulator <<= 4;
 224         digits++;
 225         accumulator += string_manipulation::char_to_hex(to_convert[i]);
 226
 227         // now we sneakily check the next character.
 228         if (!in_hex_range(to_convert[i+1])) {
 229           // we now know we should not be in this state for long.
 230           if (digits) {
 231             // there's still some undigested stuff.
 232             digits = 2;  // fake a finished byte.
 233             continue;  // keep going, but eat the character we were at.
 234           }
 235           // well, there's nothing lost if we just jump to that state.
 236           state = IGNORING_JUNK;
 237           continue;
 238         }
 239         break;
 240       }
 241     }
 242   }
 243   if (digits) {
 244     // snag the last unfinished bit.
 245     as_array += abyte(accumulator);
 246   }
 247 }
 248
 249 void byte_formatter::bytes_to_string(const byte_array &to_convert, astring &as_string,
 250     bool space_delimited)
 251 {
 252   bytes_to_string(to_convert.observe(), to_convert.length(), as_string,
 253       space_delimited);
 254 }
 255
 256 void byte_formatter::string_to_bytes(const astring &to_convert, byte_array &as_array)
 257 { string_to_bytes(to_convert.s(), as_array); }
 258
 259 void byte_formatter::bytes_to_shifted_string(const byte_array &to_convert, astring &as_string)
 260 {
 261 #ifdef DEBUG_BYTE_FORMAT
 262   FUNCDEF("bytes_to_shifted_string");
 263 #endif
 264   bit_vector splitter(8 * to_convert.length(), to_convert.observe());
 265   int i;  // track our current position.
 266   for (i = 0; i < splitter.bits(); i += 7) {
 267     abyte curr = 1;  // start with a bit set already.
 268     for (int j = i; j < i + 7; j++) {
 269       curr <<= 1;  // move to the left.
 270       if (j < splitter.bits())
 271         curr |= abyte(splitter.on(j));  // or in the current position.
 272     }
 273     as_string += char(curr);
 274   }
 275 #ifdef DEBUG_BYTE_FORMAT
 276   LOG(a_sprintf("%d bytes comes out as %d char string.",
 277       to_convert.length(), as_string.length()).s());
 278 #endif
 279 }
 280
 281 void byte_formatter::shifted_string_to_bytes(const astring &to_convert, byte_array &as_array)
 282 {
 283 #ifdef DEBUG_BYTE_FORMAT
 284   FUNCDEF("shifted_string_to_bytes");
 285 #endif
 286   bit_vector accumulator;
 287
 288   for (int i = 0; i < to_convert.length(); i++) {
 289     abyte current = abyte(to_convert[i]) & 0x7F;
 290       // get the current bits but remove the faux sign bit.
 291     accumulator.resize(accumulator.bits() + 7);
 292     // now shift off the individual pieces.
 293     for (int j = 0; j < 7; j++) {
 294       // get current bit's value.
 295       current <<= 1;  // shift it up.
 296       abyte set_here = current & 0x80;  // test the highest order bit.
 297       // now flip that bit on or off based on what we saw.
 298       accumulator.set_bit(i * 7 + j, bool(set_here));
 299     }
 300   }
 301
 302   int remainder = accumulator.bits() % 8;
 303   accumulator.resize(accumulator.bits() - remainder);
 304     // chop off any extraneous bits that are due to our shifting.
 305
 306 #ifdef DEBUG_BYTE_FORMAT
 307   // there should be no remainder.  and the number of bits should be a multiple
 308   // of eight now.
 309   if (accumulator.bits() % 8)
 310     deadly_error("byte_formatter", func, "number of bits is erroneous.");
 311 #endif
 312
 313   const byte_array &accumref = accumulator;
 314   for (int q = 0; q < accumulator.bits() / 8; q++)
 315     as_array += accumref[q];
 316
 317 #ifdef DEBUG_BYTE_FORMAT
 318   LOG(a_sprintf("%d chars comes out as %d bytes.",
 319       to_convert.length(), as_array.length()).s());
 320 #endif
 321 }
 322
 323 } // namespace
 324