#include <basis/byte_array.h>
#include <basis/functions.h>
+#include <textual/parser_bits.h>
#include <stdio.h>
#include <sys/stat.h>
#include <io.h>
#endif
+#undef LOG
+#define LOG(to_print) printf("%s::%s: %s\n", static_class_name(), func, astring(to_print).s())
+
using namespace basis;
using namespace structures;
void filename::canonicalize()
{
+ FUNCDEF("canonicalize");
// turn all the non-default separators into the default.
bool found_sep = false;
for (int j = 0; j < length(); j++) {
} else saw_sep = false;
}
+#ifdef __WIN32__
+ // on windows, we want to translate away from any cygwin or msys format into a more palatable
+ // version that the rest of windows understands.
+ // first, cygwin...
+ const astring CYGDRIVE_PATH = astring(astring(DEFAULT_SEPARATOR, 1) + "cygdrive"
+ + astring(DEFAULT_SEPARATOR, 1));
+ // must be at least as long as the string we're looking for, plus a drive letter afterwards.
+ if ( (length() > CYGDRIVE_PATH.length() + 1) && begins(CYGDRIVE_PATH) ) {
+ zap(0, CYGDRIVE_PATH.length() + 1); // whack the cygdrive portion plus two slashes.
+ insert(1, ":"); // add a colon after the imputed drive letter.
+ }
+ // now we convert msys...
+ if ( (length() >= 2) && (get(0) == DEFAULT_SEPARATOR) && textual::parser_bits::is_alpha(get(1)) ) {
+ // we seem reasonably sure now that this is a windows path hiding in msys format, but
+ // the next character needs to be a slash (if there is a next character) for it to be
+ // the windows drive form. otherwise it could be /tmp, which would obviously not be
+ // intended as a windows path.
+ if ( (length() < 3) || (get(2) == DEFAULT_SEPARATOR) ) {
+ // cool, this should be interpretable as an msys path, except for those wacky types
+ // that use top-level single character directory names. we cannot help that, because
+ // we *know* msys is a choice used in other code a lot.
+//hmmm: future revision: see if the file or directory '/x' actually exists on current drive? yuck.
+ zap(0, 0); // take off initial slash.
+ insert(1, ":"); // add the obligatory colon.
+ }
+ }
+#endif
+
+LOG(astring("ha ha turned string into: ") + *this);
+
// we don't crop the last separator if the name's too small. for msdos
// names, that would be chopping a slash off the c:\ style name.
if (length() > 3) {
bool parser_bits::is_alphanumeric(const astring &look_at, int len)
{ return is_alphanumeric(look_at.observe(), len); }
+bool parser_bits::is_alpha(char look_at)
+{ return range_check(look_at, 'a', 'z') || range_check(look_at, 'A', 'Z'); }
+
+bool parser_bits::is_alpha(const char *look_at, int len)
+{
+ for (int i = 0; i < len; i++)
+ if (!is_alpha(look_at[i])) return false;
+ return true;
+}
+
+bool parser_bits::is_alpha(const astring &look_at, int len)
+{ return is_alpha(look_at.observe(), len); }
+
bool parser_bits::is_identifier(char look_at)
{
return range_check(look_at, 'a', 'z')
static bool is_alphanumeric(const basis::astring &look_at, int len);
//!< returns true if the string "look_at" is all alphanumeric characters.
+ static bool is_alpha(char look_at);
+ //!< returns true if "look_at" is one of the alphabetical characters.
+ /*!< This includes a to z in either case. */
+ static bool is_alpha(const char *look_at, int len);
+ //!< returns true if the char ptr "look_at" is all alphabetical characters.
+ static bool is_alpha(const basis::astring &look_at, int len);
+ //!< returns true if the string "look_at" is all alphabetical characters.
+
static bool is_numeric(char look_at);
//!< returns true if "look_at" is a valid numerical character.
/*! this allows the '-' character for negative numbers also (but only for