feisty meow concerns codebase  2.140
parser_bits.cpp
Go to the documentation of this file.
1 /*****************************************************************************\
2 * *
3 * Name : parser_bits *
4 * Author : Chris Koeritz *
5 * *
6 *******************************************************************************
7 * Copyright (c) 2000-$now By Author. This program is free software; you can *
8 * redistribute it and/or modify it under the terms of the GNU General Public *
9 * License as published by the Free Software Foundation; either version 2 of *
10 * the License or (at your option) any later version. This is online at: *
11 * http://www.fsf.org/copyleft/gpl.html *
12 * Please send any updates to: fred@gruntose.com *
13 \*****************************************************************************/
14 
15 #include "parser_bits.h"
16 
17 #include <basis/astring.h>
18 #include <basis/environment.h>
19 #include <basis/functions.h>
20 
21 #include <ctype.h>
22 #include <stdio.h>
23 
24 using namespace basis;
25 
26 #undef LOG
27 #define LOG(prf) printf("%s\n", basis::astring(prf).s())
28 
29 namespace textual {
30 
31 parser_bits::line_ending parser_bits::platform_eol()
32 {
33 #ifdef __UNIX__
34  // obviously a unix OS, unless someone's playing games with us.
35  return LF_AT_END;
36 #elif defined(__WIN32__)
37  // smells like DOS.
38  return CRLF_AT_END;
39 #else
40  // pick the unix default if we can't tell.
41  return LF_AT_END;
42 #endif
43 }
44 
45 const char *parser_bits::eol_to_chars(line_ending end)
46 {
47  static const char *CRLF_AT_END_STRING = "\r\n";
48  static const char *LF_AT_END_STRING = "\n";
49  static const char *NO_ENDING_STRING = "";
50 
51  switch (end) {
52  case CRLF_AT_END: return CRLF_AT_END_STRING;
53  case NO_ENDING: return NO_ENDING_STRING;
54  case LF_AT_END: // fall-through to default.
55  default: return LF_AT_END_STRING;
56  }
57 }
58 
59 const char *parser_bits::platform_eol_to_chars()
60 { return eol_to_chars(platform_eol()); }
61 
62 bool parser_bits::is_printable_ascii(char to_check)
63 { return (to_check >= 32) && (to_check <= 126); }
64 
65 bool parser_bits::white_space_no_cr(char to_check)
66 { return (to_check == ' ') || (to_check == '\t'); }
67 
68 bool parser_bits::is_eol(char to_check)
69 { return (to_check == '\n') || (to_check == '\r'); }
70 
71 bool parser_bits::white_space(char to_check)
72 { return white_space_no_cr(to_check) || is_eol(to_check); }
73 
74 void parser_bits::translate_CR_for_platform(astring &to_translate)
75 {
76  line_ending plat_eol = platform_eol();
77  bool last_was_lf = false;
78  for (int i = 0; i <= to_translate.end(); i++) {
79  if (to_translate[i] == '\r') {
80  if (last_was_lf) continue; // ignore two in a row.
81  last_was_lf = true;
82  } else if (to_translate[i] == '\n') {
83  if (last_was_lf) {
84  if (plat_eol != CRLF_AT_END) {
85  // fix it, since there was not supposed to be an LF.
86  to_translate.zap(i - 1, i - 1);
87  i--;
88  }
89  } else {
90  if (plat_eol == CRLF_AT_END) {
91  // fix it, since we're missing an LF that we want.
92  to_translate.insert(i, "\r");
93  i++;
94  }
95  }
96  last_was_lf = false;
97  } else {
98  // not the two power characters.
99  last_was_lf = false;
100  }
101  }
102 }
103 
104 bool parser_bits::is_hexadecimal(char look_at)
105 {
106  return range_check(look_at, 'a', 'f')
107  || range_check(look_at, 'A', 'F')
108  || range_check(look_at, '0', '9');
109 }
110 
111 bool parser_bits::is_hexadecimal(const char *look_at, int len)
112 {
113  for (int i = 0; i < len; i++)
114  if (!is_hexadecimal(look_at[i])) return false;
115  return true;
116 }
117 
118 bool parser_bits::is_hexadecimal(const astring &look_at, int len)
119 { return is_hexadecimal(look_at.observe(), len); }
120 
121 bool parser_bits::is_alphanumeric(char look_at)
122 {
123  return range_check(look_at, 'a', 'z')
124  || range_check(look_at, 'A', 'Z')
125  || range_check(look_at, '0', '9');
126 }
127 
128 bool parser_bits::is_alphanumeric(const char *look_at, int len)
129 {
130  for (int i = 0; i < len; i++)
131  if (!is_alphanumeric(look_at[i])) return false;
132  return true;
133 }
134 
135 bool parser_bits::is_alphanumeric(const astring &look_at, int len)
136 { return is_alphanumeric(look_at.observe(), len); }
137 
138 bool parser_bits::is_alpha(char look_at)
139 { return range_check(look_at, 'a', 'z') || range_check(look_at, 'A', 'Z'); }
140 
141 bool parser_bits::is_alpha(const char *look_at, int len)
142 {
143  for (int i = 0; i < len; i++)
144  if (!is_alpha(look_at[i])) return false;
145  return true;
146 }
147 
148 bool parser_bits::is_alpha(const astring &look_at, int len)
149 { return is_alpha(look_at.observe(), len); }
150 
151 bool parser_bits::is_identifier(char look_at)
152 {
153  return range_check(look_at, 'a', 'z')
154  || range_check(look_at, 'A', 'Z')
155  || range_check(look_at, '0', '9')
156  || (look_at == '_');
157 }
158 
159 bool parser_bits::is_identifier(const char *look_at, int len)
160 {
161  if (is_numeric(look_at[0])) return false;
162  for (int i = 0; i < len; i++)
163  if (!is_identifier(look_at[i])) return false;
164  return true;
165 }
166 
167 bool parser_bits::is_identifier(const astring &look_at, int len)
168 { return is_identifier(look_at.observe(), len); }
169 
170 bool parser_bits::is_numeric(char look_at)
171 {
172  return range_check(look_at, '0', '9') || (look_at == '-');
173 }
174 
175 bool parser_bits::is_numeric(const char *look_at, int len)
176 {
177  for (int i = 0; i < len; i++) {
178  if (!is_numeric(look_at[i])) return false;
179  if ( (i > 0) && (look_at[i] == '-') ) return false;
180  }
181  return true;
182 }
183 
184 bool parser_bits::is_numeric(const astring &look_at, int len)
185 { return is_numeric(look_at.observe(), len); }
186 
187 astring parser_bits::substitute_env_vars(const astring &to_process,
188  bool leave_unknown)
189 {
190  astring editing = to_process;
191 
192 //LOG(astring("input to subst env: ") + to_process);
193 
194  int indy; // index of the dollar sign in the string.
195  while (true) {
196  indy = editing.find('$');
197  if (negative(indy)) break; // all done.
198  int q;
199  for (q = indy + 1; q < editing.length(); q++) {
200  if (!parser_bits::is_identifier(editing[q]))
201  break; // done getting variable name.
202  }
203  if (q != indy + 1) {
204  // we caught something in our environment variable trap...
205  astring var_name = editing.substring(indy + 1, q - 1);
206 //LOG(astring("var name ") + var_name);
207  astring value_found = environment::get(var_name);
208 //LOG(astring("val found ") + value_found);
209  if (value_found.t()) {
210  editing.zap(indy, q - 1);
211  editing.insert(indy, value_found);
212  } else {
213  if (leave_unknown) {
214  // that lookup failed. let's mark it.
215  editing[indy] = '?';
216  // simple replacement, shows variables that failed.
217  } else {
218  // replace it with blankness.
219  editing.zap(indy, q - 1);
220  }
221  }
222  } else {
223  // well, we didn't see a valid variable name, but we don't want to leave
224  // the dollar sign in there.
225  editing[indy] = '!'; // simple replacement, marks where syntax is bad.
226  }
227 
228  }
229 
230 //LOG(astring("output from subst env: ") + editing);
231 
232  return editing;
233 }
234 
235 } //namespace.
236 
Provides a dynamically resizable ASCII character string.
Definition: astring.h:35
bool t() const
t() is a shortcut for the string being "true", as in non-empty.
Definition: astring.h:97
virtual void zap(int start, int end)
Deletes the characters between "start" and "end" inclusively.
Definition: astring.cpp:521
bool substring(astring &target, int start, int end) const
a version that stores the substring in an existing "target" string.
Definition: astring.cpp:865
void insert(int position, const astring &to_insert)
Copies "to_insert" into "this" at the "position".
Definition: astring.cpp:892
int end() const
returns the index of the last (non-null) character in the string.
Definition: astring.h:86
int length() const
Returns the current length of the string.
Definition: astring.cpp:132
int find(char to_find, int position=0, bool reverse=false) const
Locates "to_find" in "this".
Definition: astring.cpp:574
virtual const char * observe() const
observes the underlying pointer to the zero-terminated string.
Definition: astring.cpp:140
static astring get(const astring &variable_name)
looks up the "variable_name" in the current environment variables.
Definition: environment.cpp:57
static bool is_hexadecimal(const char *look_at, int len)
returns true if "look_at" is all hexadecimal characters.
line_ending
Line endings is an enumeration of the separator character(s) used for text files.
Definition: parser_bits.h:31
The guards collection helps in testing preconditions and reporting errors.
Definition: array.h:30
bool range_check(const type &c, const type &low, const type &high)
Returns true if "c" is between "low" and "high" inclusive.
Definition: functions.h:88
bool negative(const type &a)
negative returns true if "a" is less than zero.
Definition: functions.h:43
bool is_eol(char to_check)