feisty meow concerns codebase 2.140
parser_bits.cpp
Go to the documentation of this file.
1/*****************************************************************************\
2* *
3* Name : parser_bits *
4* Author : Chris Koeritz *
5* *
6*******************************************************************************
7* Copyright (c) 2000-$now By Author. This program is free software; you can *
8* redistribute it and/or modify it under the terms of the GNU General Public *
9* License as published by the Free Software Foundation; either version 2 of *
10* the License or (at your option) any later version. This is online at: *
11* http://www.fsf.org/copyleft/gpl.html *
12* Please send any updates to: fred@gruntose.com *
13\*****************************************************************************/
14
15#include "parser_bits.h"
16
17#include <basis/astring.h>
18#include <basis/environment.h>
19#include <basis/functions.h>
20
21#include <ctype.h>
22#include <stdio.h>
23
24using namespace basis;
25
26#undef LOG
27#define LOG(prf) printf("%s\n", basis::astring(prf).s())
28
29namespace textual {
30
32{
33#ifdef __UNIX__
34 // obviously a unix OS, unless someone's playing games with us.
35 return LF_AT_END;
36#elif defined(__WIN32__)
37 // smells like DOS.
38 return CRLF_AT_END;
39#else
40 // pick the unix default if we can't tell.
41 return LF_AT_END;
42#endif
43}
44
46{
47 static const char *CRLF_AT_END_STRING = "\r\n";
48 static const char *LF_AT_END_STRING = "\n";
49 static const char *NO_ENDING_STRING = "";
50
51 switch (end) {
52 case CRLF_AT_END: return CRLF_AT_END_STRING;
53 case NO_ENDING: return NO_ENDING_STRING;
54 case LF_AT_END: // fall-through to default.
55 default: return LF_AT_END_STRING;
56 }
57}
58
61
63{ return (to_check >= 32) && (to_check <= 126); }
64
66{ return (to_check == ' ') || (to_check == '\t'); }
67
68bool parser_bits::is_eol(char to_check)
69{ return (to_check == '\n') || (to_check == '\r'); }
70
71bool parser_bits::white_space(char to_check)
72{ return white_space_no_cr(to_check) || is_eol(to_check); }
73
75{
76 line_ending plat_eol = platform_eol();
77 bool last_was_lf = false;
78 for (int i = 0; i <= to_translate.end(); i++) {
79 if (to_translate[i] == '\r') {
80 if (last_was_lf) continue; // ignore two in a row.
81 last_was_lf = true;
82 } else if (to_translate[i] == '\n') {
83 if (last_was_lf) {
84 if (plat_eol != CRLF_AT_END) {
85 // fix it, since there was not supposed to be an LF.
86 to_translate.zap(i - 1, i - 1);
87 i--;
88 }
89 } else {
90 if (plat_eol == CRLF_AT_END) {
91 // fix it, since we're missing an LF that we want.
92 to_translate.insert(i, "\r");
93 i++;
94 }
95 }
96 last_was_lf = false;
97 } else {
98 // not the two power characters.
99 last_was_lf = false;
100 }
101 }
102}
103
105{
106 return range_check(look_at, 'a', 'f')
107 || range_check(look_at, 'A', 'F')
108 || range_check(look_at, '0', '9');
109}
110
111bool parser_bits::is_hexadecimal(const char *look_at, int len)
112{
113 for (int i = 0; i < len; i++)
114 if (!is_hexadecimal(look_at[i])) return false;
115 return true;
116}
117
118bool parser_bits::is_hexadecimal(const astring &look_at, int len)
119{ return is_hexadecimal(look_at.observe(), len); }
120
122{
123 return range_check(look_at, 'a', 'z')
124 || range_check(look_at, 'A', 'Z')
125 || range_check(look_at, '0', '9');
126}
127
128bool parser_bits::is_alphanumeric(const char *look_at, int len)
129{
130 for (int i = 0; i < len; i++)
131 if (!is_alphanumeric(look_at[i])) return false;
132 return true;
133}
134
135bool parser_bits::is_alphanumeric(const astring &look_at, int len)
136{ return is_alphanumeric(look_at.observe(), len); }
137
138bool parser_bits::is_alpha(char look_at)
139{ return range_check(look_at, 'a', 'z') || range_check(look_at, 'A', 'Z'); }
140
141bool parser_bits::is_alpha(const char *look_at, int len)
142{
143 for (int i = 0; i < len; i++)
144 if (!is_alpha(look_at[i])) return false;
145 return true;
146}
147
148bool parser_bits::is_alpha(const astring &look_at, int len)
149{ return is_alpha(look_at.observe(), len); }
150
152{
153 return range_check(look_at, 'a', 'z')
154 || range_check(look_at, 'A', 'Z')
155 || range_check(look_at, '0', '9')
156 || (look_at == '_');
157}
158
159bool parser_bits::is_identifier(const char *look_at, int len)
160{
161 if (is_numeric(look_at[0])) return false;
162 for (int i = 0; i < len; i++)
163 if (!is_identifier(look_at[i])) return false;
164 return true;
165}
166
167bool parser_bits::is_identifier(const astring &look_at, int len)
168{ return is_identifier(look_at.observe(), len); }
169
170bool parser_bits::is_numeric(char look_at)
171{
172 return range_check(look_at, '0', '9') || (look_at == '-');
173}
174
175bool parser_bits::is_numeric(const char *look_at, int len)
176{
177 for (int i = 0; i < len; i++) {
178 if (!is_numeric(look_at[i])) return false;
179 if ( (i > 0) && (look_at[i] == '-') ) return false;
180 }
181 return true;
182}
183
184bool parser_bits::is_numeric(const astring &look_at, int len)
185{ return is_numeric(look_at.observe(), len); }
186
188 bool leave_unknown)
189{
190 astring editing = to_process;
191
192//LOG(astring("input to subst env: ") + to_process);
193
194 int indy; // index of the dollar sign in the string.
195 while (true) {
196 indy = editing.find('$');
197 if (negative(indy)) break; // all done.
198 int q;
199 for (q = indy + 1; q < editing.length(); q++) {
200 if (!parser_bits::is_identifier(editing[q]))
201 break; // done getting variable name.
202 }
203 if (q != indy + 1) {
204 // we caught something in our environment variable trap...
205 astring var_name = editing.substring(indy + 1, q - 1);
206//LOG(astring("var name ") + var_name);
207 astring value_found = environment::get(var_name);
208//LOG(astring("val found ") + value_found);
209 if (value_found.t()) {
210 editing.zap(indy, q - 1);
211 editing.insert(indy, value_found);
212 } else {
213 if (leave_unknown) {
214 // that lookup failed. let's mark it.
215 editing[indy] = '?';
216 // simple replacement, shows variables that failed.
217 } else {
218 // replace it with blankness.
219 editing.zap(indy, q - 1);
220 }
221 }
222 } else {
223 // well, we didn't see a valid variable name, but we don't want to leave
224 // the dollar sign in there.
225 editing[indy] = '!'; // simple replacement, marks where syntax is bad.
226 }
227
228 }
229
230//LOG(astring("output from subst env: ") + editing);
231
232 return editing;
233}
234
235} //namespace.
236
Provides a dynamically resizable ASCII character string.
Definition astring.h:35
bool t() const
t() is a shortcut for the string being "true", as in non-empty.
Definition astring.h:97
virtual void zap(int start, int end)
Deletes the characters between "start" and "end" inclusively.
Definition astring.cpp:524
bool substring(astring &target, int start, int end) const
a version that stores the substring in an existing "target" string.
Definition astring.cpp:868
void insert(int position, const astring &to_insert)
Copies "to_insert" into "this" at the "position".
Definition astring.cpp:895
int end() const
returns the index of the last (non-null) character in the string.
Definition astring.h:86
int length() const
Returns the current length of the string.
Definition astring.cpp:132
int find(char to_find, int position=0, bool reverse=false) const
Locates "to_find" in "this".
Definition astring.cpp:577
virtual const char * observe() const
observes the underlying pointer to the zero-terminated string.
Definition astring.cpp:140
static astring get(const astring &variable_name)
looks up the "variable_name" in the current environment variables.
static void translate_CR_for_platform(basis::astring &to_translate)
flips embedded EOL characters for this platform's needs.
static bool is_identifier(char look_at)
returns true if "look_at" is a valid identifier character.
static bool is_hexadecimal(char look_at)
returns true if "look_at" is one of the hexadecimal characters.
static bool white_space_no_cr(char to_check)
reports if "to_check" is white space but not a carriage return.
static bool white_space(char to_check)
returns true if the character "to_check" is considered a white space.
static bool is_alpha(char look_at)
returns true if "look_at" is one of the alphabetical characters.
static bool is_hexadecimal(const char *look_at, int len)
returns true if "look_at" is all hexadecimal characters.
static bool is_printable_ascii(char to_check)
returns true if "to_check" is a normally visible ASCII character.
line_ending
Line endings is an enumeration of the separator character(s) used for text files.
Definition parser_bits.h:31
@ LF_AT_END
Unix standard is LF_AT_END ("\n").
Definition parser_bits.h:32
@ CRLF_AT_END
DOS standard is CRLF_AT_END ("\r\n").
Definition parser_bits.h:33
@ NO_ENDING
No additional characters added as line endings.
Definition parser_bits.h:34
static basis::astring substitute_env_vars(const basis::astring &text, bool leave_unknown=true)
resolves embedded environment variables in "text".
static bool is_alphanumeric(char look_at)
returns true if "look_at" is one of the alphanumeric characters.
static line_ending platform_eol()
provides the appropriate ending on the current OS platform.
static bool is_numeric(char look_at)
returns true if "look_at" is a valid numerical character.
static const char * eol_to_chars(line_ending ending)
returns the C string form for the "ending" value.
static bool is_eol(char to_check)
returns true if "to_check" is part of an end-of-line sequence.
static const char * platform_eol_to_chars()
provides the characters that make up this platform's line ending.
The guards collection helps in testing preconditions and reporting errors.
Definition array.h:30
bool range_check(const type &c, const type &low, const type &high)
Returns true if "c" is between "low" and "high" inclusive.
Definition functions.h:88
bool negative(const type &a)
negative returns true if "a" is less than zero.
Definition functions.h:43