feisty meow concerns codebase 2.140
list_parsing.cpp
Go to the documentation of this file.
1/*****************************************************************************\
2* *
3* Name : list_parsing *
4* Author : Chris Koeritz *
5* Author : Gary Hardley *
6* *
7*******************************************************************************
8* Copyright (c) 2002-$now By Author. This program is free software; you can *
9* redistribute it and/or modify it under the terms of the GNU General Public *
10* License as published by the Free Software Foundation; either version 2 of *
11* the License or (at your option) any later version. This is online at: *
12* http://www.fsf.org/copyleft/gpl.html *
13* Please send any updates to: fred@gruntose.com *
14\*****************************************************************************/
15
16#include "list_parsing.h"
17#include "parser_bits.h"
18
19#include <basis/astring.h>
20#include <structures/set.h>
22
23#include <ctype.h>
24#include <stdio.h>
25
26using namespace basis;
27using namespace structures;
28
29namespace textual {
30
31#undef LOG
32#define LOG(to_print) printf("%s::%s: %s\n", static_class_name(), func, astring(to_print).s())
33
34list_parsing::~list_parsing() {} // needed since we use the class_name macro.
35
36// by Gary Hardley.
37bool list_parsing::get_ids_from_string(const astring &to_parse, int_set &identifiers)
38{
39 identifiers.clear(); // clear existing ids, if any.
40 int_array found;
41 bool ret = get_ids_from_string(to_parse, found);
42 if (!ret) return false;
43 for (int i = 0; i < found.length(); i++) identifiers.add(found[i]);
44 return true;
45}
46
47// by Gary Hardley.
49 int_array &identifiers)
50{
51 identifiers.reset(); // clear existing ids, if any.
52 if (!to_parse) return false;
53 // if an empty string is passed, return an empty set.
54
55 int last_id = -1;
56 int tmp_id;
57 bool done = false;
58 char last_separator = ' ';
59
60 int index = 0;
61 while (!done && (index < to_parse.length())) {
62 tmp_id = 0;
63 bool got_digit = false;
64 while ( (to_parse[index] != ',') && (to_parse[index] != '-')
65 && (to_parse[index] != ' ') && (index < to_parse.length()) ) {
66 if (!isdigit(to_parse[index])) return false;
67 tmp_id *= 10;
68 tmp_id += int(to_parse[index++]) - 0x30;
69 got_digit = true;
70 }
71
72 if (got_digit) {
73 if (tmp_id > MAXINT32) return false;
74
75 if (last_id == -1) {
76 last_id = tmp_id;
77 identifiers += last_id;
78 } else {
79 // if the last separator was a dash, this is a range
80 if (last_separator == '-') {
81 if (tmp_id >= last_id) {
82 for (int i = last_id + 1; i <= tmp_id; i++)
83 identifiers += i;
84 }
85 else {
86 for (int i = tmp_id; i < last_id; i++)
87 identifiers += i;
88 }
89 last_id = 0;
90 last_separator = ' ';
91 } else {
92 last_id = tmp_id;
93 identifiers += last_id;
94 }
95 }
96 } else {
97 // did not read an address, to_parse[index] must be a non-digit.
98 if ( (to_parse[index] != ' ') && (to_parse[index] != '-')
99 && (to_parse[index] != ',') ) return false;
100 last_separator = to_parse[index++];
101 }
102 }
103 return true;
104}
105
106//by chris koeritz.
108{
109 astring to_return;
110 for (int i = 0; i < ids.length(); i++) {
111 to_return += a_sprintf("%d", ids[i]);
112 if (i < ids.length() - 1) {
113 to_return += separator;
114 to_return += " ";
115 }
116 }
117 return to_return;
118}
119
120//by chris koeritz.
122{
123 astring to_return;
124 for (int i = 0; i < ids.length(); i++) {
125 to_return += a_sprintf("%d", ids[i]);
126 if (i < ids.length() - 1) {
127 to_return += separator;
128 to_return += " ";
129 }
130 }
131 return to_return;
132}
133
134// ensures that quotes inside the string "to_emit" are escaped.
136{
137 astring to_return('\0', 256); // avoid reallocations with large pre-alloc.
138 to_return = ""; // reset to get blank string but keep pre-alloc.
139 for (int i = 0; i < to_emit.length(); i++) {
140 char next_char = to_emit[i];
141 if ( (next_char == '"') || (next_char == '\\') )
142 to_return += "\\"; // add the escape before quote or backslash.
143 to_return += astring(next_char, 1);
144 }
145 return to_return;
146}
147
149{
150 target = astring::empty_string();
151 for (int i = 0; i < to_csv.symbols(); i++) {
152 target += astring("\"") + emit_quoted_chunk(to_csv.name(i))
153 + "=" + emit_quoted_chunk(to_csv[i]) + "\"";
154 if (i < to_csv.symbols() - 1) target += ",";
155 }
156}
157
159{
160 target = astring::empty_string();
161 for (int i = 0; i < to_csv.length(); i++) {
162 target += astring("\"") + emit_quoted_chunk(to_csv[i]) + "\"";
163 if (i < to_csv.length() - 1) target += ",";
164 }
165}
166
167// we do handle escaped quotes for csv parsing, so check for backslash.
168// and since we handle escaped quotes, we also have to handle escaping the
169// backslash (otherwise a quoted item with a backslash as the last character
170// cannot be handled appropriately, because it will be interpreted as an
171// escaped quote instead). no other escapes are implemented right now.
172#define handle_escapes \
173 if (to_parse[i] == '\\') { \
174 if ( (to_parse[i + 1] == '"') || (to_parse[i + 1] == '\\') ) { \
175 i++; \
176 accumulator += to_parse[i]; \
177 continue; /* skip normal handling in sequel. */ \
178 } \
179 }
180
181const int ARRAY_PREFILL_AMOUNT = 7;
182 // a random default for pre-filling.
183
184#define ADD_LINE_TO_FIELDS(new_line) { \
185 storage_slot++; /* move to next place to store item. */ \
186 /* make sure we have enough space for the next slot and then some. */ \
187/*LOG(a_sprintf("fields curr=%d stowslot=%d", fields.length(), storage_slot));*/ \
188 if (fields.length() < storage_slot + 2) \
189 fields.insert(fields.length(), ARRAY_PREFILL_AMOUNT); \
190/*LOG(a_sprintf("now fields=%d stowslot=%d", fields.length(), storage_slot));*/ \
191 fields[storage_slot] = new_line; \
192}
193
194//hmmm: parameterize what is meant by a quote. maybe comma too.
195//by chris koeritz.
196bool list_parsing::parse_csv_line(const astring &to_parse, string_array &fields)
197{
198 FUNCDEF("parse_csv_line");
199 // the current field we're chowing. we puff it out to start with to
200 // avoid paying for expanding its memory later.
201 astring accumulator(' ', 256);
202 accumulator = astring::empty_string();
203
204 // the state machine goes through these states until the entire string
205 // is consumed.
206 enum states { seeking_quote, eating_string, seeking_comma };
207 states state = seeking_quote;
208
209 bool no_second_quote = false; // true if we started without a quote.
210 bool just_saw_comma = false; // true if seeking comma was the last state.
211
212 int storage_slot = -1;
213
214 for (int i = 0; i < to_parse.length(); i++) {
215 switch (state) {
216 case seeking_quote:
217 if (parser_bits::white_space(to_parse[i])) continue;
218 if (to_parse[i] == ',') {
219 // a missing quoted string counts as an empty string.
220 ADD_LINE_TO_FIELDS(astring::empty_string());
221 just_saw_comma = true;
222 continue;
223 }
224 just_saw_comma = false; // cancel that state.
225 if (to_parse[i] != '"') {
226 // short circuit the need for a quote.
227 accumulator += to_parse[i];
228 no_second_quote = true;
229 }
230 state = eating_string;
231 break;
232 case eating_string:
233 just_saw_comma = false; // no longer true.
234 if (no_second_quote && (to_parse[i] != ',') ) {
236 accumulator += to_parse[i];
237 } else if (!no_second_quote && (to_parse[i] != '"') ) {
239 accumulator += to_parse[i];
240 } else {
241 // we found the closing quote (or comma). add the string.
242 if (no_second_quote) {
243 state = seeking_quote;
244 just_saw_comma = true;
245 } else state = seeking_comma;
246 ADD_LINE_TO_FIELDS(accumulator)
247 accumulator = astring::empty_string();
248 no_second_quote = false;
249 }
250 break;
251 case seeking_comma:
252 if (parser_bits::white_space(to_parse[i])) continue;
253 if (to_parse[i] == ',') {
254 // we got what we wanted.
255 state = seeking_quote;
256 just_saw_comma = true;
257 continue;
258 }
259 // well, there was no comma. that's an error.
260 return false;
261 break;
262 default: {
263 LOG("erroneous state reached during csv parsing");
264 break;
265 }
266 }
267 }
268 if ( (state == eating_string) && (accumulator.length()) )
269 ADD_LINE_TO_FIELDS(accumulator)
270 else if (just_saw_comma)
272 if (fields.length() > storage_slot + 1)
273 fields.zap(storage_slot + 1, fields.last());
274 return true;
275}
276
277} //namespace.
278
279
a_sprintf is a specialization of astring that provides printf style support.
Definition astring.h:440
void reset(int number=0, const contents *initial_contents=NULL_POINTER)
Resizes this array and sets the contents from an array of contents.
Definition array.h:349
int length() const
Returns the current reported length of the allocated C array.
Definition array.h:115
Provides a dynamically resizable ASCII character string.
Definition astring.h:35
static const astring & empty_string()
useful wherever empty strings are needed, e.g., function defaults.
Definition astring.cpp:128
int length() const
Returns the current length of the string.
Definition astring.cpp:132
A simple object that wraps a templated array of ints.
Definition array.h:275
A simple object that wraps a templated set of ints.
Definition set.h:156
bool add(const contents &to_add)
Adds a new element "to_add" to the set.
Definition set.h:232
void clear()
Empties out this set.
Definition set.h:55
An array of strings with some additional helpful methods.
Provides a symbol_table that holds strings as the content.
const basis::astring & name(int index) const
returns the name held at the "index".
int symbols() const
returns the number of symbols listed in the table.
static basis::astring put_ids_in_string(const structures::int_set &ids, char separator=',')
returns a string containing a "separator" separated list of ids.
static basis::astring emit_quoted_chunk(const basis::astring &to_emit)
ensures that quotes inside the string "to_emit" are escaped.
static void create_csv_line(const structures::string_array &to_csv, basis::astring &target)
static bool get_ids_from_string(const basis::astring &string, structures::int_set &ids)
returns true if a set of unique ids can be extracted from "string".
#define MAXINT32
Maximum 32-bit integer value.
Definition definitions.h:75
#define ADD_LINE_TO_FIELDS(new_line)
#define LOG(to_print)
#define handle_escapes
The guards collection helps in testing preconditions and reporting errors.
Definition array.h:30
A dynamic container class that holds any kind of object via pointers.
Definition amorph.h:55