26 using namespace basis;
32 #define LOG(to_print) printf("%s::%s: %s\n", static_class_name(), func, astring(to_print).s())
34 list_parsing::~list_parsing() {}
37 bool list_parsing::get_ids_from_string(
const astring &to_parse,
int_set &identifiers)
41 bool ret = get_ids_from_string(to_parse, found);
42 if (!ret)
return false;
43 for (
int i = 0; i < found.
length(); i++) identifiers.
add(found[i]);
48 bool list_parsing::get_ids_from_string(
const astring &to_parse,
52 if (!to_parse)
return false;
58 char last_separator =
' ';
61 while (!done && (index < to_parse.
length())) {
63 bool got_digit =
false;
64 while ( (to_parse[index] !=
',') && (to_parse[index] !=
'-')
65 && (to_parse[index] !=
' ') && (index < to_parse.
length()) ) {
66 if (!isdigit(to_parse[index]))
return false;
68 tmp_id += int(to_parse[index++]) - 0x30;
77 identifiers += last_id;
80 if (last_separator ==
'-') {
81 if (tmp_id >= last_id) {
82 for (
int i = last_id + 1; i <= tmp_id; i++)
86 for (
int i = tmp_id; i < last_id; i++)
93 identifiers += last_id;
98 if ( (to_parse[index] !=
' ') && (to_parse[index] !=
'-')
99 && (to_parse[index] !=
',') )
return false;
100 last_separator = to_parse[index++];
110 for (
int i = 0; i < ids.
length(); i++) {
112 if (i < ids.
length() - 1) {
113 to_return += separator;
124 for (
int i = 0; i < ids.
length(); i++) {
126 if (i < ids.
length() - 1) {
127 to_return += separator;
139 for (
int i = 0; i < to_emit.
length(); i++) {
140 char next_char = to_emit[i];
141 if ( (next_char ==
'"') || (next_char ==
'\\') )
143 to_return +=
astring(next_char, 1);
151 for (
int i = 0; i < to_csv.
symbols(); i++) {
152 target +=
astring(
"\"") + emit_quoted_chunk(to_csv.
name(i))
153 +
"=" + emit_quoted_chunk(to_csv[i]) +
"\"";
154 if (i < to_csv.
symbols() - 1) target +=
",";
161 for (
int i = 0; i < to_csv.
length(); i++) {
162 target +=
astring(
"\"") + emit_quoted_chunk(to_csv[i]) +
"\"";
163 if (i < to_csv.
length() - 1) target +=
",";
172 #define handle_escapes \
173 if (to_parse[i] == '\\') { \
174 if ( (to_parse[i + 1] == '"') || (to_parse[i + 1] == '\\') ) { \
176 accumulator += to_parse[i]; \
177 continue; /* skip normal handling in sequel. */ \
181 const int ARRAY_PREFILL_AMOUNT = 7;
182 // a random default for pre-filling.
184 #define ADD_LINE_TO_FIELDS(new_line) { \
185 storage_slot++; /* move to next place to store item. */ \
186 /* make sure we have enough space for the next slot and then some. */ \
187 /*LOG(a_sprintf("fields curr=%d stowslot=%d", fields.length(), storage_slot));*/ \
188 if (fields.length() < storage_slot + 2) \
189 fields.insert(fields.length(), ARRAY_PREFILL_AMOUNT); \
190 /*LOG(a_sprintf("now fields=%d stowslot=%d", fields.length(), storage_slot));*/ \
191 fields[storage_slot] = new_line; \
194 //hmmm: parameterize what is meant by a quote. maybe comma too.
196 bool list_parsing::parse_csv_line(const astring &to_parse, string_array &fields)
198 FUNCDEF("parse_csv_line");
199 // the current field we're chowing. we puff it out to start with to
200 // avoid paying for expanding its memory later.
201 astring accumulator(' ', 256);
202 accumulator = astring::empty_string();
204 // the state machine goes through these states until the entire string
206 enum states { seeking_quote, eating_string, seeking_comma };
207 states state = seeking_quote;
209 bool no_second_quote = false; // true if we started without a quote.
210 bool just_saw_comma = false; // true if seeking comma was the last state.
212 int storage_slot = -1;
214 for (int i = 0; i < to_parse.length(); i++) {
217 if (parser_bits::white_space(to_parse[i])) continue;
218 if (to_parse[i] == ',') {
219 // a missing quoted string counts as an empty string.
220 ADD_LINE_TO_FIELDS(astring::empty_string());
221 just_saw_comma = true;
224 just_saw_comma = false; // cancel that state.
225 if (to_parse[i] != '"') {
227 accumulator += to_parse[i];
228 no_second_quote =
true;
230 state = eating_string;
233 just_saw_comma =
false;
234 if (no_second_quote && (to_parse[i] !=
',') ) {
236 accumulator += to_parse[i];
237 }
else if (!no_second_quote && (to_parse[i] !=
'"') ) {
239 accumulator += to_parse[i];
242 if (no_second_quote) {
243 state = seeking_quote;
244 just_saw_comma =
true;
245 }
else state = seeking_comma;
248 no_second_quote =
false;
252 if (parser_bits::white_space(to_parse[i]))
continue;
253 if (to_parse[i] ==
',') {
255 state = seeking_quote;
256 just_saw_comma =
true;
263 LOG(
"erroneous state reached during csv parsing");
268 if ( (state == eating_string) && (accumulator.length()) )
270 else if (just_saw_comma)
272 if (fields.length() > storage_slot + 1)
273 fields.zap(storage_slot + 1, fields.last());
a_sprintf is a specialization of astring that provides printf style support.
void reset(int number=0, const contents *initial_contents=NULL_POINTER)
Resizes this array and sets the contents from an array of contents.
int length() const
Returns the current reported length of the allocated C array.
Provides a dynamically resizable ASCII character string.
static const astring & empty_string()
useful wherever empty strings are needed, e.g., function defaults.
int length() const
Returns the current length of the string.
A simple object that wraps a templated array of ints.
A simple object that wraps a templated set of ints.
bool add(const contents &to_add)
Adds a new element "to_add" to the set.
void clear()
Empties out this set.
An array of strings with some additional helpful methods.
Provides a symbol_table that holds strings as the content.
const basis::astring & name(int index) const
returns the name held at the "index".
int symbols() const
returns the number of symbols listed in the table.
#define MAXINT32
Maximum 32-bit integer value.
#define ADD_LINE_TO_FIELDS(new_line)
The guards collection helps in testing preconditions and reporting errors.
A dynamic container class that holds any kind of object via pointers.