feisty meow concerns codebase 2.140
byte_formatter.cpp
Go to the documentation of this file.
1/*****************************************************************************\
2* *
3* Name : byte_formatter *
4* Author : Chris Koeritz *
5* *
6*******************************************************************************
7* Copyright (c) 1992-$now By Author. This program is free software; you can *
8* redistribute it and/or modify it under the terms of the GNU General Public *
9* License as published by the Free Software Foundation; either version 2 of *
10* the License or (at your option) any later version. This is online at: *
11* http://www.fsf.org/copyleft/gpl.html *
12* Please send any updates to: fred@gruntose.com *
13\*****************************************************************************/
14
15#include "byte_formatter.h"
16#include "parser_bits.h"
17#include "string_manipulation.h"
18
19#include <basis/functions.h>
22
23//#define DEBUG_BYTE_FORMAT
24 // uncomment for noisier version.
25
26#undef LOG
27#ifdef DEBUG_BYTE_FORMAT
28 #define LOG(s) printf("%s\n", astring(s).s())
29#else
30 #define LOG(s) {}
31#endif
32
33#define LINE_SIZE 80
34
35using namespace basis;
36using namespace structures;
37
38namespace textual {
39
40void byte_formatter::print_char(abyte to_print, astring &out, char replace)
41{
42 int temp = to_print % 128;
43 if (!parser_bits::is_printable_ascii(to_print)) out += replace;
44 else out += char(temp);
45}
46
47void byte_formatter::print_chars(const abyte *to_print, int len, astring &out, char replace)
48{
49 for (int i = 0; i < len; i++)
50 print_char(to_print[i], out, replace);
51}
52
54{
55 basis::un_int thresh = 0x10000000;
56 while (thresh >= 0x10) {
57 if (num < thresh)
58 out += '0';
59 thresh >>= 4; // zap a nibble.
60 }
61}
62
64 const char *eol)
65{
66 astring to_return;
67 text_dump(to_return, location, length, label, eol);
68 return to_return;
69}
70
71void byte_formatter::text_dump(astring &output, const byte_array &to_dump, basis::un_int label,
72 const char *eol)
73{
74 text_dump(output, to_dump.observe(), to_dump.length(), label, eol);
75}
76
77astring byte_formatter::text_dump(const byte_array &to_dump, basis::un_int label, const char *eol)
78{
79 astring output;
80 text_dump(output, to_dump.observe(), to_dump.length(), label, eol);
81 return output;
82}
83
84// this is the real version of text_dump. all the others use it.
85void byte_formatter::text_dump(astring &to_return, const abyte *location, basis::un_int length,
86 basis::un_int label, const char *eol)
87{
88 to_return = "";
89 int entry_size = 4;
90 int preamble = 14;
91
92 basis::un_int entries_per_line = (LINE_SIZE - preamble) / entry_size;
93
94 for (basis::un_int i = 0; i < length; i += entries_per_line) {
95 make_eight(i + label, to_return);
96 to_return += astring(astring::SPRINTF, "%x", i + label) + astring(" | ");
97 for (basis::un_int j = 0; j < entries_per_line; j++) {
98 if (i + j >= length) {
99 // if at the end of the loop, just print spaces.
100 to_return += " ";
101 } else {
102 int ord_of_current_char = *(location + i + j) & 0xFF;
103 if (ord_of_current_char < 0x10) to_return += '0';
104 to_return += astring(astring::SPRINTF, "%x", int(ord_of_current_char));
105 to_return += ' ';
106 }
107 }
108
109 to_return += "| ";
110 for (basis::un_int k = i; k < i + entries_per_line; k++) {
111 if (k >= length) to_return += ' ';
112 // if past the end of the block, just add spaces.
113 else print_char(*(location + k), to_return);
114 }
115 to_return += astring(" |") + eol;
116 }
117}
118
119void byte_formatter::parse_dump(const astring &dumped_form, byte_array &bytes_found)
120{
121 bytes_found.reset();
122 string_array lines_found;
123 // iterate over the string and break it up into lines.
124 for (int i = 0; i < dumped_form.length(); i++) {
125 int indy = dumped_form.find('\n', i);
126//hmmm: not platform invariant. what about '\r' if we see it?
127
128 if (negative(indy)) {
129 // no more lines found.
130 if (i < dumped_form.length() - 1) {
131 // grab the last bit as a line.
132 lines_found += dumped_form.substring(i, dumped_form.length() - 1);
133 }
134 break;
135 }
136 // found a normal line ending, so drop everything from the current
137 // position up to the ending into the list of strings.
138 lines_found += dumped_form.substring(i, indy - 1);
139 i = indy + 1; // jump to next potential line.
140 }
141 // now process the lines that we've found.
142 for (int j = 0; j < lines_found.length(); j++) {
143 // first step is to find the pipe character that brackets the actual
144 // data. we ignore the "address" located before the pipe.
145 astring &s = lines_found[j];
146 int bar_one = s.find('|', 0);
147 if (negative(bar_one)) continue; // skip this one; it's malformed.
148 // now we look for the second pipe that comes before the text form of
149 // the data. we don't care about the text or anything after.
150 int bar_two = s.find('|', bar_one + 1);
151 if (negative(bar_two)) continue; // skip for same reason.
152 astring s2 = s.substring(bar_one + 1, bar_two - 1);
153 byte_array this_part;
154 string_to_bytes(s2, this_part);
155 bytes_found += this_part;
156 }
157}
158
160
161void byte_formatter::bytes_to_string(const abyte *to_convert, int length, astring &as_string,
162 bool space_delimited)
163{
164 if (!to_convert || !length) return; // nothing to do.
165 if (negative(length)) return; // bunk.
166 as_string = ""; // reset the output parameter.
167
168 // the pattern is used for printing the bytes and considering the delimiter.
169 astring pattern("%02x");
170 if (space_delimited) pattern += " ";
171
172 // now zip through the array and dump it into the string.
173 for (int i = 0; i < length; i++)
174 as_string += astring(astring::SPRINTF, pattern.s(), to_convert[i]);
175}
176
177// returns true if the character is within the valid ranges of hexadecimal
178// nibbles (as text).
180//hmmm: move this to parser bits.
181{
182 return ( (to_check <= '9') && (to_check >= '0') )
183 || ( (to_check <= 'f') && (to_check >= 'a') )
184 || ( (to_check <= 'F') && (to_check >= 'A') );
185}
186
187void byte_formatter::string_to_bytes(const char *to_convert, byte_array &as_array)
188{
189 as_array.reset(); // clear the array.
190 const int len = int(strlen(to_convert));
191
192 // the parser uses a simple state machine for processing the string.
193 enum states { FINDING_HEX, IGNORING_JUNK };
194 states state = IGNORING_JUNK;
195
196 int digits = 0; // the number of digits we've currently found.
197 int accumulator = 0; // the current hex duo.
198
199 // loop through the string.
200 for (int i = 0; i < len; i++) {
201 switch (state) {
202 case IGNORING_JUNK: {
203 if (in_hex_range(to_convert[i])) {
204 i--; // skip back to where we were before now.
205 state = FINDING_HEX;
206 continue; // jump to the other state.
207 }
208 // otherwise, we could care less what the character is.
209 break;
210 }
211 case FINDING_HEX: {
212 if (digits >= 2) {
213 // we have finished a hex byte.
214 as_array += abyte(accumulator);
215 accumulator = 0;
216 digits = 0;
217 i--; // skip back for the byte we haven't eaten yet.
218 state = IGNORING_JUNK; // jump to other state for a new item.
219 continue;
220 }
221 // we really think this is a digit here and we're not through with
222 // accumulating them.
223 accumulator <<= 4;
224 digits++;
225 accumulator += string_manipulation::char_to_hex(to_convert[i]);
226
227 // now we sneakily check the next character.
228 if (!in_hex_range(to_convert[i+1])) {
229 // we now know we should not be in this state for long.
230 if (digits) {
231 // there's still some undigested stuff.
232 digits = 2; // fake a finished byte.
233 continue; // keep going, but eat the character we were at.
234 }
235 // well, there's nothing lost if we just jump to that state.
236 state = IGNORING_JUNK;
237 continue;
238 }
239 break;
240 }
241 }
242 }
243 if (digits) {
244 // snag the last unfinished bit.
245 as_array += abyte(accumulator);
246 }
247}
248
249void byte_formatter::bytes_to_string(const byte_array &to_convert, astring &as_string,
250 bool space_delimited)
251{
252 bytes_to_string(to_convert.observe(), to_convert.length(), as_string,
253 space_delimited);
254}
255
256void byte_formatter::string_to_bytes(const astring &to_convert, byte_array &as_array)
257{ string_to_bytes(to_convert.s(), as_array); }
258
260{
261#ifdef DEBUG_BYTE_FORMAT
262 FUNCDEF("bytes_to_shifted_string");
263#endif
264 bit_vector splitter(8 * to_convert.length(), to_convert.observe());
265 int i; // track our current position.
266 for (i = 0; i < splitter.bits(); i += 7) {
267 abyte curr = 1; // start with a bit set already.
268 for (int j = i; j < i + 7; j++) {
269 curr <<= 1; // move to the left.
270 if (j < splitter.bits())
271 curr |= abyte(splitter.on(j)); // or in the current position.
272 }
273 as_string += char(curr);
274 }
275#ifdef DEBUG_BYTE_FORMAT
276 LOG(a_sprintf("%d bytes comes out as %d char string.",
277 to_convert.length(), as_string.length()).s());
278#endif
279}
280
282{
283#ifdef DEBUG_BYTE_FORMAT
284 FUNCDEF("shifted_string_to_bytes");
285#endif
286 bit_vector accumulator;
287
288 for (int i = 0; i < to_convert.length(); i++) {
289 abyte current = abyte(to_convert[i]) & 0x7F;
290 // get the current bits but remove the faux sign bit.
291 accumulator.resize(accumulator.bits() + 7);
292 // now shift off the individual pieces.
293 for (int j = 0; j < 7; j++) {
294 // get current bit's value.
295 current <<= 1; // shift it up.
296 abyte set_here = current & 0x80; // test the highest order bit.
297 // now flip that bit on or off based on what we saw.
298 accumulator.set_bit(i * 7 + j, bool(set_here));
299 }
300 }
301
302 int remainder = accumulator.bits() % 8;
303 accumulator.resize(accumulator.bits() - remainder);
304 // chop off any extraneous bits that are due to our shifting.
305
306#ifdef DEBUG_BYTE_FORMAT
307 // there should be no remainder. and the number of bits should be a multiple
308 // of eight now.
309 if (accumulator.bits() % 8)
310 deadly_error("byte_formatter", func, "number of bits is erroneous.");
311#endif
312
313 const byte_array &accumref = accumulator;
314 for (int q = 0; q < accumulator.bits() / 8; q++)
315 as_array += accumref[q];
316
317#ifdef DEBUG_BYTE_FORMAT
318 LOG(a_sprintf("%d chars comes out as %d bytes.",
319 to_convert.length(), as_array.length()).s());
320#endif
321}
322
323} // namespace
324
#define LOG(s)
#define LINE_SIZE
a_sprintf is a specialization of astring that provides printf style support.
Definition astring.h:440
void reset(int number=0, const contents *initial_contents=NULL_POINTER)
Resizes this array and sets the contents from an array of contents.
Definition array.h:349
const contents * observe() const
Returns a pointer to the underlying C array of data.
Definition array.h:172
int length() const
Returns the current reported length of the allocated C array.
Definition array.h:115
Provides a dynamically resizable ASCII character string.
Definition astring.h:35
const char * s() const
synonym for observe. the 's' stands for "string", if that helps.
Definition astring.h:113
bool substring(astring &target, int start, int end) const
a version that stores the substring in an existing "target" string.
Definition astring.cpp:868
int length() const
Returns the current length of the string.
Definition astring.cpp:132
int find(char to_find, int position=0, bool reverse=false) const
Locates "to_find" in "this".
Definition astring.cpp:577
A very common template for a dynamic array of bytes.
Definition byte_array.h:36
An array of bits with operations for manipulating and querying individual bits.
Definition bit_vector.h:26
bool on(int position) const
returns true if the bit at "position" is set.
int bits() const
returns the number of bits in the vector.
void set_bit(int position, bool value)
sets the bit at "position" to a particular "value".
void resize(int size)
Changes the size of the bit_vector to "size" bits.
An array of strings with some additional helpful methods.
static void shifted_string_to_bytes(const basis::astring &to_convert, basis::byte_array &as_array)
unshifts a string "to_convert" back into a byte_array.
static void print_chars(const basis::abyte *to_print, int length, basis::astring &out, char replace='_')
sends the bytes in "to_print" of "length" bytes into the string "out".
static bool in_hex_range(char to_check)
static void text_dump(basis::astring &output, const basis::abyte *location, basis::un_int length, basis::un_int label=0, const char *eol="\n")
prints out a block of memory in a human readable form.
static void bytes_to_string(const basis::byte_array &to_convert, basis::astring &as_string, bool space_delimited=true)
converts a byte_array into a string.
static void string_to_bytes(const basis::astring &to_convert, basis::byte_array &as_array)
wrangles the string "to_convert" into an equivalent byte form "as_array".
static void parse_dump(const basis::astring &dumped_form, basis::byte_array &bytes_found)
this operation performs the inverse of a text_dump.
static void make_eight(basis::un_int num, basis::astring &out)
static void bytes_to_shifted_string(const basis::byte_array &to_convert, basis::astring &as_string)
this is a special purpose converter from bytes to character strings.
static void print_char(basis::abyte to_print, basis::astring &out, char replace='_')
prints the byte "to_print" into "out" as long as "to_print" is readable.
static bool is_printable_ascii(char to_check)
returns true if "to_check" is a normally visible ASCII character.
static basis::abyte char_to_hex(char to_convert)
Converts a single character into the corresponding hex nibble.
#define deadly_error(c, f, i)
#define FUNCDEF(func_in)
FUNCDEF sets the name of a function (and plugs it into the callstack).
Definition enhance_cpp.h:54
The guards collection helps in testing preconditions and reporting errors.
Definition array.h:30
unsigned char abyte
A fairly important unit which is seldom defined...
Definition definitions.h:51
unsigned int un_int
Abbreviated name for unsigned integers.
Definition definitions.h:62
bool negative(const type &a)
negative returns true if "a" is less than zero.
Definition functions.h:43
A dynamic container class that holds any kind of object via pointers.
Definition amorph.h:55