feisty meow concerns codebase 2.140
ini_parser.cpp
Go to the documentation of this file.
1/*****************************************************************************\
2* *
3* Name : ini_parser *
4* Author : Chris Koeritz *
5* *
6*******************************************************************************
7* Copyright (c) 2000-$now By Author. This program is free software; you can *
8* redistribute it and/or modify it under the terms of the GNU General Public *
9* License as published by the Free Software Foundation; either version 2 of *
10* the License or (at your option) any later version. This is online at: *
11* http://www.fsf.org/copyleft/gpl.html *
12* Please send any updates to: fred@gruntose.com *
13\*****************************************************************************/
14
15#include "ini_parser.h"
16#include "table_configurator.h"
17#include "variable_tokenizer.h"
18
19#include <basis/astring.h>
20#include <basis/functions.h>
21#include <structures/amorph.h>
24#include <textual/parser_bits.h>
25
26//#define DEBUG_INI_PARSER
27 // uncomment for noisy version.
28
29#undef LOG
30#ifdef DEBUG_INI_PARSER
31 #define LOG(to_print) printf("%s\n", astring(to_print).s())
32#else
33 #define LOG(a) {}
34#endif
35
37
38using namespace basis;
39using namespace structures;
40using namespace textual;
41//using namespace ;
42
43namespace configuration {
44
45//algorithm:
46// gather section until next section definition or end of file.
47// parse the section with variable_tokenizer.
48// eat that out of the string.
49// repeat.
50
52: table_configurator(behavior),
53 _well_formed(false),
54 _preface(new astring)
55{
56 reset(to_parse);
57}
58
60{
61 WHACK(_preface);
62}
63
64void ini_parser::chow_through_eol(astring &to_chow)
65{
66 while (to_chow.length()) {
67 if (parser_bits::is_eol(to_chow[0])) {
68 // zap all carriage return type chars now that we found one.
69 while (to_chow.length() && parser_bits::is_eol(to_chow[0])) {
70 *_preface += to_chow[0];
71 to_chow.zap(0, 0);
72 }
73 return; // mission accomplished.
74 }
75 *_preface += to_chow[0];
76 to_chow.zap(0, 0);
77 }
78}
79
80/*
81//this is a super expensive operation...
82// it would be better to have the parser be a bit more intelligent.
83void strip_blank_lines(astring &to_strip)
84{
85 bool last_was_ret = false;
86 for (int i = 0; i < to_strip.length(); i++) {
87 if (parser_bits::is_eol(to_strip[i])) {
88 if (last_was_ret) {
89 // two in a row; now that's bogus.
90 to_strip.zap(i, i);
91 i--; // skip back.
92 continue;
93 }
94 last_was_ret = true;
95 to_strip[i] = '\n'; // make sure we know which type to look for.
96 } else {
97 if (last_was_ret && parser_bits::white_space(to_strip[i])) {
98 // well, the last was a return but this is white space. that's also
99 // quite bogus.
100 to_strip.zap(i, i);
101 i--; // skip back.
102 continue;
103 }
104 last_was_ret = false;
105 }
106 }
107}
108*/
109
110void ini_parser::reset(const astring &to_parse)
111{
112 _well_formed = false;
113 table_configurator::reset(); // clean out existing contents.
114 _preface->reset(); // set the preface string back to nothing.
115 add(to_parse);
116}
117
118void ini_parser::add(const astring &to_parse)
119{
120 astring parsing = to_parse;
121// strip_blank_lines(parsing);
122 _preface->reset(); // set the preface string back to nothing.
123 while (parsing.length()) {
124 astring section_name;
125 bool found_sect = parse_section(parsing, section_name);
126 if (!found_sect) {
127 // the line is not a section name. toss it.
128 chow_through_eol(parsing);
129 continue; // try to find another section name.
130 }
131 // we got a section. yee hah.
132 int next_sect = 0;
133 for (next_sect = 0; next_sect < parsing.length(); next_sect++) {
134// LOG(astring("[") + astring(parsing[next_sect], 1) + "]");
135 if (parser_bits::is_eol(parsing[next_sect])) {
136 // we found the requisite return; let's see if a section beginning
137 // is just after it. we know nothing else should be, since we stripped
138 // out the blank lines and blanks after CRs.
139 if (parsing[next_sect + 1] == '[') {
140 // aha, found the bracket that should be a section start.
141 break; // done seeking next section beginning.
142 }
143 }
144 }
145 // skip back one if we hit the end of the string.
146 if (next_sect >= parsing.length()) next_sect--;
147 // now grab what should be all values within a section.
148 LOG(a_sprintf("bounds are %d to %d, string len is %d.", 0, next_sect,
149 parsing.length()));
150 astring sect_parsing = parsing.substring(0, next_sect);
151 LOG(astring("going to parse: >>") + sect_parsing + "<<");
152 parsing.zap(0, next_sect);
153 variable_tokenizer section_reader("\n", "=");
154 section_reader.set_comment_chars(";#");
155 section_reader.parse(sect_parsing);
156 LOG(astring("read: ") + section_reader.text_form());
157 merge_section(section_name, section_reader.table());
158 }
159 _well_formed = true;
160}
161
162void ini_parser::merge_section(const astring &section_name,
163 const string_table &to_merge)
164{
165 if (!section_exists(section_name)) {
166 // didn't exist yet, so just plunk it in.
167 put_section(section_name, to_merge);
168 return;
169 }
170
171 // since the section exists, we just write the individual entries from the
172 // new section. they'll stamp out any old values.
173 for (int i = 0; i < to_merge.symbols(); i++)
174 put(section_name, to_merge.name(i), to_merge[i]);
175}
176
177bool ini_parser::parse_section(astring &to_parse, astring &section_name)
178{
179 section_name = ""; // reset the section.
180
181 // we have a simple state machine here...
182 enum states {
183 SEEKING_OPENING_BRACKET, // looking for the first bracket.
184 EATING_SECTION_NAME // got a bracket, now getting section name.
185 };
186 states state = SEEKING_OPENING_BRACKET;
187
188 // zip through the string trying to find a valid section name.
189 for (int i = 0; i < to_parse.length(); i++) {
190 char curr = to_parse[i];
191 LOG(astring("<") + astring(curr, 1) + ">");
192 switch (state) {
193 case SEEKING_OPENING_BRACKET:
194 // we're looking for the first bracket now...
195 if (parser_bits::white_space(curr)) continue; // ignore white space.
196 if (curr != '[') return false; // argh, bad characters before bracket.
197 state = EATING_SECTION_NAME; // found the bracket.
198 break;
199 case EATING_SECTION_NAME:
200 // we're adding to the section name now...
201 if (curr == ']') {
202 // that's the end of the section name.
203 to_parse.zap(0, i); // remove what we saw.
204//should we take out to end of line also?
205//eventually up to eol could be kept as a comment?
206 return true;
207 }
208 section_name += curr; // add a character to the name.
209 break;
210 default:
211 //LOG("got to unknown case in section parser!");
212 return false;
213 }
214 }
215 // if we got to here, the section was badly formed... the whole string was
216 // parsed through but no conclusion was reached.
217 return false;
218}
219
220bool ini_parser::restate(astring &new_ini, bool add_spaces)
221{
222 new_ini = *_preface; // give it the initial text back again.
223 string_array sects;
224 sections(sects);
225 for (int i = 0; i < sects.length(); i++) {
226 new_ini += astring("[") + sects[i] + "]" + parser_bits::platform_eol_to_chars();
227 string_table tab;
228 if (!get_section(sects[i], tab)) continue; // serious error.
229 tab.add_spaces(add_spaces);
230 new_ini += tab.text_form();
231 }
232 return true;
233}
234
235} //namespace.
236
237
#define LOG(s)
a_sprintf is a specialization of astring that provides printf style support.
Definition astring.h:440
int length() const
Returns the current reported length of the allocated C array.
Definition array.h:115
Provides a dynamically resizable ASCII character string.
Definition astring.h:35
virtual void zap(int start, int end)
Deletes the characters between "start" and "end" inclusively.
Definition astring.cpp:524
bool substring(astring &target, int start, int end) const
a version that stores the substring in an existing "target" string.
Definition astring.cpp:868
void reset()
clears out the contents string.
Definition astring.h:202
int length() const
Returns the current length of the string.
Definition astring.cpp:132
void add(const basis::astring &to_parse)
merges items parsed from "to_parse" into the current set.
ini_parser(const basis::astring &to_parse, treatment_of_defaults behavior=RETURN_ONLY)
constructs an ini_parser by parsing entries out of "to_parse".
void merge_section(const basis::astring &section_name, const structures::string_table &to_merge)
merges the table "to_merge" into the "section_name".
bool restate(basis::astring &new_ini, bool add_spaces=false)
stores a cleaned version of the internal state into "new_ini".
Supports the configurator interface using a collection of string tables.
virtual bool put(const basis::astring &section, const basis::astring &entry, const basis::astring &to_store)
implements the configurator storage function.
virtual void sections(structures::string_array &list)
retrieves the section names into "list".
virtual bool section_exists(const basis::astring &section)
true if the "section" is presently in the table config.
virtual bool put_section(const basis::astring &section, const structures::string_table &info)
writes a table called "info" into the "section" held here.
virtual bool get_section(const basis::astring &section, structures::string_table &info)
reads the entire table held under "section" into a table called "info".
Manages a bank of textual definitions of variables.
const structures::string_table & table() const
provides a constant peek at the string_table holding the values.
basis::astring text_form() const
creates a new token list as a string of text.
void set_comment_chars(const basis::astring &comments)
establishes a set of characters in "comments" as the comment items.
bool parse(const basis::astring &to_tokenize)
parses the string using our established sentinel characters.
An array of strings with some additional helpful methods.
Provides a symbol_table that holds strings as the content.
basis::astring text_form() const
prints the contents of the table into the returned string.
const basis::astring & name(int index) const
returns the name held at the "index".
int symbols() const
returns the number of symbols listed in the table.
static bool white_space(char to_check)
returns true if the character "to_check" is considered a white space.
static bool is_eol(char to_check)
returns true if "to_check" is part of an end-of-line sequence.
static const char * platform_eol_to_chars()
provides the characters that make up this platform's line ending.
The guards collection helps in testing preconditions and reporting errors.
Definition array.h:30
void WHACK(contents *&ptr)
deletion with clearing of the pointer.
Definition functions.h:121
A dynamic container class that holds any kind of object via pointers.
Definition amorph.h:55