4 /*****************************************************************************\
7 * Author : Chris Koeritz *
9 *******************************************************************************
10 * Copyright (c) 1992-$now By Author. This program is free software; you can *
11 * redistribute it and/or modify it under the terms of the GNU General Public *
12 * License as published by the Free Software Foundation; either version 2 of *
13 * the License or (at your option) any later version. This is online at: *
14 * http://www.fsf.org/copyleft/gpl.html *
15 * Please send any updates to: fred@gruntose.com *
16 \*****************************************************************************/
18 #include "base_string.h"
19 #include "byte_array.h"
20 #include "contracts.h"
26 //! Provides a dynamically resizable ASCII character string.
28 It mimics the standard (char *) type, but provides a slew of helpful
29 methods as well as enforcing bounds checking on the underlying array.
33 : public virtual base_string,
34 public virtual hoople_standard
38 //!< constructs an empty string.
40 astring(const char *initial);
41 //!< constructs a copy of the string passed in "initial".
43 astring(char c, int repeat);
44 //!< constructs a string with "repeat" characters of "c" in it.
45 /*!< if "c" is the null character (i.e., equal to zero), then the resulting
46 string will have "repeat" space characters in it. */
48 astring(const astring &s);
49 //!< Constructs a copy of the string "s".
51 astring(const base_string &initial);
52 //!< constructs a string from the base class.
54 enum special_flag { UNTERMINATED = 62, SPRINTF = 84 };
55 astring(special_flag way, const char *s, ...);
56 //!< constructor that sports a few variable parameter constructions.
58 For a flag of "UNTERMINATED", the constructor expects the third
59 parameter to be an integer, and then it copies that number of
60 characters from the C-string "s" without assuming that "s" is zero
63 For a flag of "SPRINTF", a string is constructed using the format specifier
64 in "s" in a manner similar to the standard library "sprintf" function
65 (see the standard library for <string.h>). If there are no "%" codes in
66 "s", then the constructor just copies "s" without modification. If "%"
67 codes are in the character array, then any additional arguments (...) are
68 interpreted as they would be by sprintf. The length of the
69 constructed string is tailored to fit the actual contents. If "s" is
70 null, then the resulting string will be empty. Currently, the "*"
71 specifier for variable length fields is not supported. */
74 //!< destroys any storage for the string.
76 DEFINE_CLASS_NAME("astring");
78 virtual int comparator(const astring &s2) const;
79 //!< helps to fulfill orderable contract.
82 //!< Returns the current length of the string.
83 /*!< The length returned does not include the terminating null character
84 at the end of the string. */
86 int end() const { return length() - 1; }
87 //!< returns the index of the last (non-null) character in the string.
88 /*!< If there is no content in the string, then a negative value is
91 bool empty() const { return !length(); }
92 //!< empty() reports if the string is empty, that is, of zero length().
93 bool non_empty() const { return !empty(); }
94 //!< non_empty() reports if the string has some contents.
95 bool operator ! () const { return empty(); }
96 //!< the negation operator returns true if the string is empty.
97 /*!< it can be used in expressions in a readable way, for example:
98 if (!my_string) { it_is_empty; } */
99 bool t() const { return !empty(); }
100 //!< t() is a shortcut for the string being "true", as in non-empty.
101 /*!< the logic here is that the string is not false because it's not
102 empty. for example: if (my_string.t()) { it_is_not_empty; } */
104 static const astring &empty_string();
105 //!< useful wherever empty strings are needed, e.g., function defaults.
106 /*!< note that this is implemented in the opsystem library to avoid bad
107 issues with static objects mixed into multiple dlls from a static
110 virtual const char *observe() const;
111 //!< observes the underlying pointer to the zero-terminated string.
112 /*!< this does not allow the contents to be modified. this method should
113 never return NULL_POINTER. */
114 const char *c_str() const { return observe(); }
115 //!< synonym for observe. mimics the STL method name.
116 const char *s() const { return observe(); }
117 //!< synonym for observe. the 's' stands for "string", if that helps.
119 virtual char get(int index) const;
120 //!< a constant peek at the string's internals at the specified index.
122 virtual char *access();
123 //!< provides access to the actual string held.
124 /*!< this should never return NULL_POINTER. be very careful with the returned
125 pointer: don't destroy or corrupt its contents (e.g., do not mess with
126 its zero termination). */
127 char *c_str() { return access(); }
128 //!< synonym for access. mimics the STL method.
129 char *s() { return access(); }
130 //!< synonym for access.
132 char &operator [] (int position);
133 //!< accesses individual characters in "this" string.
134 /*!< if the "position" is out of range, the return value is
136 const char &operator [] (int position) const;
137 //!< observes individual characters in "this" string.
138 /*!< if the "position" is out of range, the return value is
141 virtual void put(int position, char to_put) { (*this)[position] = to_put; }
142 //!< stores the character "to_put" at index "position" in the string.
144 astring &sprintf(const char *s, ...);
145 //!< similar to the SPRINTF constructor, but works on an existing string.
146 /*!< any existing contents in the string are wiped out. */
148 int convert(int default_value) const;
149 //!< Converts the string into a corresponding integer.
150 /*!< The conversion starts at index 0 in "this" string, and stores it in
151 "value". If a valid integer was found, it is returned. otherwise, the
152 "default_value" is returned. NOTE: be careful of implicit conversions
153 here; the "default_value" for any of these functions must either be an
154 object of the exact type needed or must be cast to that type. */
155 long convert(long default_value) const;
156 //!< converts the string to a long integer.
157 float convert(float default_value) const;
158 //!< converts the string to a floating point number.
159 double convert(double default_value) const;
160 //!< converts the string to a double precision floating point number.
162 bool equal_to(const char *that) const;
163 //!< returns true if "that" is equal to this.
165 bool iequals(const astring &that) const;
166 //!< returns true if this is case-insensitively equal to "that".
167 bool iequals(const char *that) const;
168 //!< returns true if this is case-insensitively equal to "that".
170 bool compare(const astring &to_compare, int start_first,
171 int start_second, int count, bool case_sensitive) const;
172 //!< Compares "this" string with "to_compare".
173 /*!< The "start_first" is where the comparison begins in "this" string,
174 and "start_second" where it begins in the "to_compare". The "count" is
175 the number of characters to compare between the two strings. If either
176 index is out of range, or "count"-1 + either index is out of range, then
177 compare returns false. If the strings differ in that range, false is
178 returned. Only if the strings have identical contents in the range is
181 bool begins(const astring &maybe_prefix) const
182 { return compare(maybe_prefix, 0, 0, maybe_prefix.length(), true); }
183 //!< Returns true if "this" string begins with "maybe_prefix".
185 bool ibegins(const astring &maybe_prefix) const
186 { return compare(maybe_prefix, 0, 0, maybe_prefix.length(), false); }
187 //!< a case-insensitive method similar to begins().
189 //! returns true if this string ends with "maybe_suffix".
190 bool ends(const astring &maybe_suffix) const {
191 const int diff = length() - maybe_suffix.length();
192 return (diff >= 0) && compare(maybe_suffix, diff, 0, maybe_suffix.length(), true);
194 //!< a case-insensitive method similar to ends().
195 bool iends(const astring &maybe_suffix) const {
196 const int diff = length() - maybe_suffix.length();
197 return (diff >= 0) && compare(maybe_suffix, diff, 0, maybe_suffix.length(), false);
200 astring &operator = (const astring &s);
201 //!< Sets the contents of this string to "s".
202 astring &operator = (const char *s);
203 //!< Sets the contents of this string to "s".
205 void reset() { zap(0, end()); }
206 //!< clears out the contents string.
208 void reset(special_flag way, const char *s, ...);
209 //!< operates like the constructor that takes a 'special_flag'.
211 void copy(char *to_stuff, int count) const;
212 //!< Copies a maximum of "count" characters from this into "to_stuff".
213 /*!< The target "to_stuff" is a standard C-string. The terminating zero
214 from this string is also copied. BE CAREFUL: if "count"+1 is greater than
215 the allocated length of the C-string "to_stuff", then an invalid memory
216 write will occur. keep in mind that the terminating zero will be put at
217 position "count" in the C-string if the full "count" of characters are
219 void stuff(char *to_stuff, int count) const { copy(to_stuff, count); }
220 //!< a synonym for copy().
222 astring operator + (const astring &s) const;
223 //!< Returns the concatenation of "this" and "s".
225 astring &operator += (const astring &s);
226 //!< Modifies "this" by concatenating "s" onto it.
228 astring &operator += (const char *s); // this is efficient.
229 //!< synonym for the concatenation operator but uses a char pointer instead.
230 astring operator + (const char *s) const { return *this + astring(s); }
231 //!< synonym for the concatenation operator but uses a char pointer instead.
232 // this method is not efficient.
234 astring &operator += (char c); //!< concatenater for single characters.
236 int find(char to_find, int position = 0, bool reverse = false) const;
237 //!< Locates "to_find" in "this".
238 /*!< find returns the index of "to_find" or "NOT_FOUND". find starts
239 looking at "position". find returns "OUT_OF_RANGE" if the position is
240 beyond the bounds of "this". */
241 int find(const astring &to_find, int posn = 0, bool reverse = false) const;
242 //!< finds "to_find" in this string.
244 int ifind(char to_find, int position = 0, bool reverse = false) const;
245 //!< like the find() methods above, but case-insensitive.
246 int ifind(const astring &to_find, int posn = 0, bool reverse = false) const;
247 //!< like the find() methods above, but case-insensitive.
249 int find_any(const char *to_find, int position = 0,
250 bool reverse = false) const;
251 //!< searches for any of the characters in "to_find".
252 /*!< the first occurrence of any of those is returned, or a negative
253 number is returned if no matches are found. */
254 int ifind_any(const char *to_find, int position = 0,
255 bool reverse = false) const;
256 //!< searches case-insensitively for any of the characters in "to_find".
257 /*!< the first occurrence of any of those is returned, or a negative number
258 is returned if none are found. */
259 int find_non_match(const char *to_find, int position = 0,
260 bool reverse = false) const;
261 //!< searches for any character that is not in "to_find" and returns index.
263 bool contains(const astring &to_find) const;
264 //!< Returns true if "to_find" is contained in this string or false if not.
266 bool substring(astring &target, int start, int end) const;
267 //!< a version that stores the substring in an existing "target" string.
269 astring substring(int start, int end) const;
270 //!< Returns the segment of "this" between the indices "start" and "end".
271 /*!< An empty string is returned if the indices are out of range. */
273 // helper methods similar to other string's choppers.
274 astring middle(int start, int count);
275 //!< returns the middle of the string from "start" with "count" characters.
276 astring left(int count);
277 //!< returns the left "count" characters from the string.
278 astring right(int count);
279 //!< returns the right "count" characters from the string.
281 void pad(int length, char padding = ' ');
282 //!< makes the string "length" characters long.
283 /*!< this string is padded with the "padding" character if the string is
284 less than that length initially. */
285 void trim(int length);
286 //!< shortens the string to "length" if it's longer than that.
288 void insert(int position, const astring &to_insert);
289 //!< Copies "to_insert" into "this" at the "position".
290 /*!< Characters at the index "position" and greater are moved over. */
291 virtual void zap(int start, int end);
292 //!< Deletes the characters between "start" and "end" inclusively.
293 /*!< C++ array conventions are used (0 through length()-1 are valid). If
294 either index is out of bounds, then the string is not modified. */
297 //!< to_lower modifies "this" by replacing capitals with lower-case.
298 /*!< every capital letter is replaced with the corresponding lower case
299 letter (i.e., A becomes a). */
301 //!< to_upper does the opposite of to_lower (that is, q becomes Q).
302 astring lower() const;
303 //!< like to_lower(), but returns a new string rather than modifying this.
304 astring upper() const;
305 //!< like to_upper(), but returns a new string rather than modifying this.
307 bool replace(const astring &tag, const astring &replacement);
308 //!< replaces the first occurrence of "tag" text with the "replacement".
309 /*!< true is returned if the "tag" was actually found and replaced. */
310 bool replace_all(char to_replace, char new_char);
311 //!< changes all occurrences of "to_replace" with "new_char".
312 bool replace_all(const astring &to_replace, const astring &new_string);
313 //! changes all occurrences of "to_replace" into "new_string".
316 //!< resizes the string to its minimum possible length.
317 /*!< this fixes any situations where a null character has been inserted
318 into the middle of the string. the string is truncated after the first
319 null charater encountered and its size is corrected. this also repairs
320 any case where the string was originally longer than it is now. */
322 enum how_to_strip { FROM_FRONT = 1, FROM_END = 2, FROM_BOTH_SIDES = 3 };
323 //!< an enumeration describing the strip operations.
325 void strip(const astring &strip_list, how_to_strip way = FROM_BOTH_SIDES);
326 //!< strips all chars from "strip_list" out of "this" given the "way".
328 void strip_spaces(how_to_strip way = FROM_BOTH_SIDES)
330 //!< removes excess space characters from string's beginning, end or both.
332 void strip_white_spaces(how_to_strip way = FROM_BOTH_SIDES)
333 { strip(" \t", way); }
334 //!< like strip_spaces, but includes tabs in the list to strip.
336 static bool matches(const astring &match_list, char to_match);
337 //!< returns true if "to_match" is found in the "match_list" string.
339 int packed_size() const;
340 //!< Reports the size required to pack this string into a byte array.
342 void pack(byte_array &target) const;
343 //!< stores this string in the "target". it can later be unpacked again.
344 bool unpack(byte_array &source);
345 //!< retrieves a string (packed with pack()) from "source" into this string.
346 /*!< note that the string is grabbed from the array destructively; whatever
347 portion of the byte array was used to store the string will be removed from
348 the head of the array. */
350 //hmmm: rename this--it is not a simple icompare, but a strncasecmp analogue.
351 // int icompare(const astring &to_compare, int length = -1) const;
352 //!< provides a case insensitive comparison routine.
353 /*!< this uses the best methods available (that is, it uses a system
354 function if one exists). the string "to_compare" is compared with this
355 string. if the "length" is negative, then this entire string is compared
356 with the entire string "to_compare". otherwise, only "length" characters
357 from this string are compared. if this string is before "to_compare" in
358 a lexicographic ordering (basically alphabetical), then a negative number
359 is returned. if this string is after "to_compare", then a positive number
360 is returned. zero is returned if the two strings are equal for the extent
363 /// int icompare(const char *to_compare, int length = -1) const;
364 //!< a version of the above for raw character pointers.
366 /// static int slow_strncasecmp(const char *first, const char *second,
367 /// int length = -1);
368 //!< a replacement for strncasecmp on platforms without them.
369 /*!< this is slow because it cannot rely on OS methods to perform the
370 comparison. if the "length" is negative, then the entire string "first"
371 is compared to "second". otherwise just "length" characters are compared.
372 this follows the standard library strncasecmp method: the return value can
373 be in three states: negative, zero and positive. zero means the strings
374 are identical lexicographically , whereas less than zero means
375 "this_string" is less than "to_compare" and greater than zero means
376 "this_string" is greater than "to_compare". */
378 // play-yard for implementing base class requirements.
380 // these implement the orderable and equalizable interfaces.
381 virtual bool equal_to(const equalizable &s2) const;
382 virtual bool less_than(const orderable &s2) const;
384 virtual base_string &concatenate_string(const base_string &s);
385 virtual base_string &concatenate_char(char c);
386 virtual base_string &assign(const base_string &s);
387 virtual base_string &upgrade(const char *s);
388 virtual bool sub_string(base_string &target, int start, int end) const;
389 virtual bool sub_compare(const base_string &to_compare, int start_first,
390 int start_second, int count, bool case_sensitive) const;
391 virtual void insert(int position, const base_string &to_insert);
392 virtual void text_form(base_string &state_fill) const;
395 byte_array c_character_manager;
396 //!< hides the real object responsible for implementing much of the class.
398 // the real find methods.
399 int char_find(char to_find, int position, bool reverse,
400 bool case_sense) const;
401 // if "invert_find" is true, then non-matches are reported instead of matches.
402 int char_find_any(const astring &to_find, int position, bool reverse,
403 bool case_sense, bool invert_find = false) const;
404 int str_find(const astring &to_find, int posn, bool reverse,
407 // the functions below are used in the formatting string constructor.
408 public: // only for base_sprintf.
409 astring &base_sprintf(const char *s, va_list &args);
411 char *const *c_held_string; //!< peeks into the actual pointer for debugging.
413 void seek_flag(const char *&traverser, char *flag_chars, bool &failure);
414 //!< looks for optional flag characters.
415 void seek_width(const char *&traverser, char *width_chars);
416 //!< looks for optional width characters.
417 void seek_precision(const char *&traverser, char *precision_chars);
418 //!< looks for optional precision characters.
419 void seek_modifier(const char *&traverser, char *modifier_char);
420 //!< looks for optional modifier characters.
421 void get_type_character(const char *&traverser, va_list &args,
422 astring &output_string, const char *flag_chars,
423 const char *width_chars, const char *precision_chars,
424 const char *modifier_chars);
425 /*!< the required character in a format specifier is either grabbed here or
426 the other characters are put into the ouput string without formatting.
427 the "X"_char variables should have been previously gathered by the
428 seek_"X" functions. */
430 public: byte_array &get_implementation(); private: // for test programs only....
435 //! a_sprintf is a specialization of astring that provides printf style support.
436 /*! it makes it much easier to call the SPRINTF style constructor but is
437 otherwise identical to an astring. */
439 class a_sprintf : public astring
443 a_sprintf(const char *initial, ...);
444 a_sprintf(const astring &s);
449 typedef bool string_comparator_function(const astring &a, const astring &b);
450 //!< returns true if the strings "a" and "b" are considered equal.
451 /*!< this provides a prototype for the equality operation, which allows the
452 notion of equality to be redefined according to a particular function's
455 bool astring_comparator(const astring &a, const astring &b);
456 //!< implements a string comparator that just does simple astring ==.
460 void attach(byte_array &packed_form, const char *to_attach);
461 //!< Packs a character string "to_attach" into "packed_form".
462 bool detach(byte_array &packed_form, astring &to_detach);
463 //!< Unpacks a character string "to_attach" from "packed_form".