1 #ifndef TOKENIZER_CLASS
2 #define TOKENIZER_CLASS
5 * Name : variable_tokenizer
6 * Author : Chris Koeritz
8 * Copyright (c) 1997-$now By Author. This program is free software; you can *
9 * redistribute it and/or modify it under the terms of the GNU General Public *
10 * License as published by the Free Software Foundation; either version 2 of *
11 * the License or (at your option) any later version. This is online at: *
12 * http://www.fsf.org/copyleft/gpl.html *
13 * Please send any updates to: fred@gruntose.com *
16 #include <basis/contracts.h>
17 #include <structures/string_table.h>
19 namespace configuration {
21 //! Manages a bank of textual definitions of variables.
23 Manipulates strings containing variable definitions where a variable
24 is syntactically defined as a name, an assignment operator, and a value.
25 The string can optionally define many variables by placing a separator
26 character between the definitions. The assignment and separator are
27 referred to as sentinels in the following docs.
28 This class also supports quoted values if the appropriate constructor
32 class variable_tokenizer : public virtual basis::root_object
35 enum constraints { DEFAULT_MAX_BITS = 7 };
37 variable_tokenizer(int max_bits = DEFAULT_MAX_BITS);
38 //!< creates a variable_tokenizer with the default characters.
39 /*!< this will not look for quote characters. the "max_bits" establishes
40 the hashing width for the internal table of strings; there will be
41 2 ^ "max_bits" of space in the table. the default assignment operator
42 is '=' and the default separator is ','. */
44 variable_tokenizer(const basis::astring &separator, const basis::astring &assignment,
45 int max_bits = DEFAULT_MAX_BITS);
46 //!< creates an empty list of tokens and uses the specified sentinel chars.
47 /*!< the character that is expected to be between name/value pairs is
48 "separator". the "assignment" character is expected to be between each
49 name and its value. note that if the "separator" or "assignment" are more
50 than one character long, these will be taken as a set of valid characters
51 that can be used for those purposes. */
53 variable_tokenizer(const basis::astring &separator, const basis::astring &assignment,
54 const basis::astring "es, bool nesting = true,
55 int max_bits = DEFAULT_MAX_BITS);
56 //!< similar to the constructor above, but supports quoting.
57 /*!< if the "quotes" list is not empty, then those characters will be
58 treated as quoting characters that must be matched in pairs. inside a
59 quote, separators are ignored. if "nesting" is not true, then only one
60 level of quotes will be considered; the occurrence of other types of
61 quotes will be ignored until the original type is completed. */
63 variable_tokenizer(const variable_tokenizer &to_copy);
64 //!< builds a variable_tokenizer that is identical to "to_copy".
66 virtual ~variable_tokenizer();
68 DEFINE_CLASS_NAME("variable_tokenizer");
70 void set_comment_chars(const basis::astring &comments);
71 //!< establishes a set of characters in "comments" as the comment items.
72 /*!< comments will be specially handled by being added to the string table
73 with the comment prefix. this allows them to be regenerated uniquely
76 variable_tokenizer &operator =(const variable_tokenizer &to_copy);
77 //!< makes this variable_tokenizer identical to "to_copy".
80 //!< returns the number of entries in the variable_tokenizer.
83 //!< clears all of the entries out.
85 const structures::string_table &table() const;
86 //!< provides a constant peek at the string_table holding the values.
87 structures::string_table &table();
88 //!< provides direct access to the string_table holding the values.
91 bool parse(const basis::astring &to_tokenize);
92 //!< parses the string using our established sentinel characters.
93 /*!< attempts to snag as many value/pairs from "to_tokenize" as are
94 possible by using the current separator and assignment characters.
95 E.G.: if the separator is ';' and the assignment character
96 is '=', then one's string would look something like: @code
97 state_folder=/home/fred/state; GLOB=/usr/bin/glob.exe; .... @endcode
98 whitespace is ignored if it's found (1) after a separator and before
99 the next variable name, (2) after the variable name and before the
100 assignment character, (3) after the assignment character and before the
101 value. this unfortunately implies that white space cannot begin or end
103 NOTE: unpredictable results will occur: if one's variables are
104 improperly formed, if assignment operators are missing or misplaced,
105 or if the separator character is used within the value.
106 NOTE: carriage returns are considered white-space and can exist in the
107 string as described above.
108 NOTE: parse is additive; if multiple calls to parse() occur, then the
109 symbol_table will be built from the most recent values found in the
110 parameters to parse(). if this is not desired, the symbol table's
111 reset() function can be used to empty out all variables. */
113 basis::astring find(const basis::astring &name) const;
114 //!< locates the value for a variable named "name" if it exists.
115 /*!< if "name" doesn't exist, then it returns an empty string. note that
116 an empty string might also indicate that the value is blank; locate is the
117 way to tell if a field is really missing. also note that when a variable
118 name is followed by an assignment operator and an empty value (e.g.,
119 "avversione=" has no value), then a value of a single space character
120 will be stored. this ensures that the same format is used on the
121 output side, but it also means that if you access the table directly,
122 then you will get a space as the value. however, this function returns
123 an empty string for those entries to keep consistent with expectations. */
125 bool exists(const basis::astring &name) const;
126 //!< returns true if the "name" exists in the variable_tokenizer.
128 basis::astring text_form() const;
129 //!< creates a new token list as a string of text.
130 /*!< the first separator and assignment characters in each set are used
131 to generate it. note that the whitespace that existed in the original
132 parsed string might not be exactly the same in the generated string. */
133 void text_form(basis::astring &to_fill) const;
134 //!< like text_form() above, but stores into "to_fill".
136 // dictates whether the output will have spaces between the assignment
137 // character and the key name and value. default is to not add them.
138 bool add_spaces() const { return _add_spaces; }
139 void add_spaces(bool add_them) { _add_spaces = add_them; }
141 bool okay_for_variable_name(char to_check) const;
142 //!< true if "to_check" is a valid variable name character.
143 /*!< this includes any characters besides separators and assignments. */
145 const basis::astring &assignments() const;
146 //!< provides a peek at the assignments list.
147 const basis::astring &separators() const;
148 //!< provides a peek at the separators list.
149 const basis::astring "es() const;
150 //!< provides a peek at the quotes list.
152 bool assignment(char to_check) const;
153 //!< true if "to_check" is a valid assignment operator.
155 bool separator(char to_check) const;
156 //!< true if "to_check" is a valid separator.
158 bool comment_char(char to_check) const;
159 //!< true if "to_check" is a registered comment character.
161 bool is_eol_a_separator() const;
162 //!< reports whether any of the separators are an EOL character.
164 bool quote_mark(char to_check) const;
165 //!< true if "to_check" is a member of the quotes list.
168 structures::string_table *_implementation; //!< holds the parsed values.
169 basis::astring *_assignments; //!< separates name from value.
170 basis::astring *_separators; //!< separates name/value pairs from other pairs.
171 basis::astring *_quotes; //!< the characters that are used for quoting.
172 bool _nesting; //!< if true, we nest arbitrary levels of quotes.
173 basis::astring *_comments; //!< if non-empty, characters that begin comments.
174 int _comment_number; //!< automatically incremented for use in comment tags.
175 bool _add_spaces; //!< records whether we add spaces around the assignment.