1 /*****************************************************************************\
3 * Name : list_parsing *
4 * Author : Chris Koeritz *
5 * Author : Gary Hardley *
7 *******************************************************************************
8 * Copyright (c) 2002-$now By Author. This program is free software; you can *
9 * redistribute it and/or modify it under the terms of the GNU General Public *
10 * License as published by the Free Software Foundation; either version 2 of *
11 * the License or (at your option) any later version. This is online at: *
12 * http://www.fsf.org/copyleft/gpl.html *
13 * Please send any updates to: fred@gruntose.com *
14 \*****************************************************************************/
16 #include "list_parsing.h"
17 #include "parser_bits.h"
19 #include <basis/astring.h>
20 #include <structures/set.h>
21 #include <structures/string_table.h>
26 using namespace basis;
27 using namespace structures;
32 #define LOG(to_print) printf("%s::%s: %s\n", static_class_name(), func, astring(to_print).s())
34 list_parsing::~list_parsing() {} // needed since we use the class_name macro.
37 bool list_parsing::get_ids_from_string(const astring &to_parse, int_set &identifiers)
39 identifiers.clear(); // clear existing ids, if any.
41 bool ret = get_ids_from_string(to_parse, found);
42 if (!ret) return false;
43 for (int i = 0; i < found.length(); i++) identifiers.add(found[i]);
48 bool list_parsing::get_ids_from_string(const astring &to_parse,
49 int_array &identifiers)
51 identifiers.reset(); // clear existing ids, if any.
52 if (!to_parse) return false;
53 // if an empty string is passed, return an empty set.
58 char last_separator = ' ';
61 while (!done && (index < to_parse.length())) {
63 bool got_digit = false;
64 while ( (to_parse[index] != ',') && (to_parse[index] != '-')
65 && (to_parse[index] != ' ') && (index < to_parse.length()) ) {
66 if (!isdigit(to_parse[index])) return false;
68 tmp_id += int(to_parse[index++]) - 0x30;
73 if (tmp_id > MAXINT32) return false;
77 identifiers += last_id;
79 // if the last separator was a dash, this is a range
80 if (last_separator == '-') {
81 if (tmp_id >= last_id) {
82 for (int i = last_id + 1; i <= tmp_id; i++)
86 for (int i = tmp_id; i < last_id; i++)
93 identifiers += last_id;
97 // did not read an address, to_parse[index] must be a non-digit.
98 if ( (to_parse[index] != ' ') && (to_parse[index] != '-')
99 && (to_parse[index] != ',') ) return false;
100 last_separator = to_parse[index++];
107 astring list_parsing::put_ids_in_string(const int_set &ids, char separator)
110 for (int i = 0; i < ids.length(); i++) {
111 to_return += a_sprintf("%d", ids[i]);
112 if (i < ids.length() - 1) {
113 to_return += separator;
121 astring list_parsing::put_ids_in_string(const int_array &ids, char separator)
124 for (int i = 0; i < ids.length(); i++) {
125 to_return += a_sprintf("%d", ids[i]);
126 if (i < ids.length() - 1) {
127 to_return += separator;
134 // ensures that quotes inside the string "to_emit" are escaped.
135 astring list_parsing::emit_quoted_chunk(const astring &to_emit)
137 astring to_return('\0', 256); // avoid reallocations with large pre-alloc.
138 to_return = ""; // reset to get blank string but keep pre-alloc.
139 for (int i = 0; i < to_emit.length(); i++) {
140 char next_char = to_emit[i];
141 if ( (next_char == '"') || (next_char == '\\') )
142 to_return += "\\"; // add the escape before quote or backslash.
143 to_return += astring(next_char, 1);
148 void list_parsing::create_csv_line(const string_table &to_csv, astring &target)
150 target = astring::empty_string();
151 for (int i = 0; i < to_csv.symbols(); i++) {
152 target += astring("\"") + emit_quoted_chunk(to_csv.name(i))
153 + "=" + emit_quoted_chunk(to_csv[i]) + "\"";
154 if (i < to_csv.symbols() - 1) target += ",";
158 void list_parsing::create_csv_line(const string_array &to_csv, astring &target)
160 target = astring::empty_string();
161 for (int i = 0; i < to_csv.length(); i++) {
162 target += astring("\"") + emit_quoted_chunk(to_csv[i]) + "\"";
163 if (i < to_csv.length() - 1) target += ",";
167 // we do handle escaped quotes for csv parsing, so check for backslash.
168 // and since we handle escaped quotes, we also have to handle escaping the
169 // backslash (otherwise a quoted item with a backslash as the last character
170 // cannot be handled appropriately, because it will be interpreted as an
171 // escaped quote instead). no other escapes are implemented right now.
172 #define handle_escapes \
173 if (to_parse[i] == '\\') { \
174 if ( (to_parse[i + 1] == '"') || (to_parse[i + 1] == '\\') ) { \
176 accumulator += to_parse[i]; \
177 continue; /* skip normal handling in sequel. */ \
181 const int ARRAY_PREFILL_AMOUNT = 7;
182 // a random default for pre-filling.
184 #define ADD_LINE_TO_FIELDS(new_line) { \
185 storage_slot++; /* move to next place to store item. */ \
186 /* make sure we have enough space for the next slot and then some. */ \
187 /*LOG(a_sprintf("fields curr=%d stowslot=%d", fields.length(), storage_slot));*/ \
188 if (fields.length() < storage_slot + 2) \
189 fields.insert(fields.length(), ARRAY_PREFILL_AMOUNT); \
190 /*LOG(a_sprintf("now fields=%d stowslot=%d", fields.length(), storage_slot));*/ \
191 fields[storage_slot] = new_line; \
194 //hmmm: parameterize what is meant by a quote. maybe comma too.
196 bool list_parsing::parse_csv_line(const astring &to_parse, string_array &fields)
198 FUNCDEF("parse_csv_line");
199 // the current field we're chowing. we puff it out to start with to
200 // avoid paying for expanding its memory later.
201 astring accumulator(' ', 256);
202 accumulator = astring::empty_string();
204 // the state machine goes through these states until the entire string
206 enum states { seeking_quote, eating_string, seeking_comma };
207 states state = seeking_quote;
209 bool no_second_quote = false; // true if we started without a quote.
210 bool just_saw_comma = false; // true if seeking comma was the last state.
212 int storage_slot = -1;
214 for (int i = 0; i < to_parse.length(); i++) {
217 if (parser_bits::white_space(to_parse[i])) continue;
218 if (to_parse[i] == ',') {
219 // a missing quoted string counts as an empty string.
220 ADD_LINE_TO_FIELDS(astring::empty_string());
221 just_saw_comma = true;
224 just_saw_comma = false; // cancel that state.
225 if (to_parse[i] != '"') {
226 // short circuit the need for a quote.
227 accumulator += to_parse[i];
228 no_second_quote = true;
230 state = eating_string;
233 just_saw_comma = false; // no longer true.
234 if (no_second_quote && (to_parse[i] != ',') ) {
236 accumulator += to_parse[i];
237 } else if (!no_second_quote && (to_parse[i] != '"') ) {
239 accumulator += to_parse[i];
241 // we found the closing quote (or comma). add the string.
242 if (no_second_quote) {
243 state = seeking_quote;
244 just_saw_comma = true;
245 } else state = seeking_comma;
246 ADD_LINE_TO_FIELDS(accumulator)
247 accumulator = astring::empty_string();
248 no_second_quote = false;
252 if (parser_bits::white_space(to_parse[i])) continue;
253 if (to_parse[i] == ',') {
254 // we got what we wanted.
255 state = seeking_quote;
256 just_saw_comma = true;
259 // well, there was no comma. that's an error.
263 LOG("erroneous state reached during csv parsing");
268 if ( (state == eating_string) && (accumulator.length()) )
269 ADD_LINE_TO_FIELDS(accumulator)
270 else if (just_saw_comma)
271 ADD_LINE_TO_FIELDS(astring::empty_string())
272 if (fields.length() > storage_slot + 1)
273 fields.zap(storage_slot + 1, fields.last());