From 293f927b59ff6a34067e7cf37c00d1a4291095a1 Mon Sep 17 00:00:00 2001 From: Chris Koeritz Date: Sat, 7 Apr 2012 11:47:14 -0400 Subject: [PATCH] added support to strip out cygdrive and msys paths on windows. not sure how consistent the rest of the codebase is about using filename object where it should or could. will have to patch through making one-off code start using it. parser bits got a simple new set of functions for testing just alphabetical characters. probably a lot of places using their own code for this, if we didn't have that characterized before (just had alphanumeric and numeric tests, as well as identifier, which is for variables and not regular words). --- nucleus/library/filesystem/filename.cpp | 35 +++++++++++++++++++++++++ nucleus/library/filesystem/filename.h | 3 +-- nucleus/library/textual/parser_bits.cpp | 13 +++++++++ nucleus/library/textual/parser_bits.h | 8 ++++++ 4 files changed, 57 insertions(+), 2 deletions(-) diff --git a/nucleus/library/filesystem/filename.cpp b/nucleus/library/filesystem/filename.cpp index ef148ea8..6c87f99d 100644 --- a/nucleus/library/filesystem/filename.cpp +++ b/nucleus/library/filesystem/filename.cpp @@ -19,6 +19,7 @@ #include #include +#include #include #include @@ -30,6 +31,9 @@ #include #endif +#undef LOG +#define LOG(to_print) printf("%s::%s: %s\n", static_class_name(), func, astring(to_print).s()) + using namespace basis; using namespace structures; @@ -146,6 +150,7 @@ void filename::push(const astring &to_push) void filename::canonicalize() { + FUNCDEF("canonicalize"); // turn all the non-default separators into the default. bool found_sep = false; for (int j = 0; j < length(); j++) { @@ -176,6 +181,36 @@ void filename::canonicalize() } else saw_sep = false; } +#ifdef __WIN32__ + // on windows, we want to translate away from any cygwin or msys format into a more palatable + // version that the rest of windows understands. + // first, cygwin... + const astring CYGDRIVE_PATH = astring(astring(DEFAULT_SEPARATOR, 1) + "cygdrive" + + astring(DEFAULT_SEPARATOR, 1)); + // must be at least as long as the string we're looking for, plus a drive letter afterwards. + if ( (length() > CYGDRIVE_PATH.length() + 1) && begins(CYGDRIVE_PATH) ) { + zap(0, CYGDRIVE_PATH.length() + 1); // whack the cygdrive portion plus two slashes. + insert(1, ":"); // add a colon after the imputed drive letter. + } + // now we convert msys... + if ( (length() >= 2) && (get(0) == DEFAULT_SEPARATOR) && textual::parser_bits::is_alpha(get(1)) ) { + // we seem reasonably sure now that this is a windows path hiding in msys format, but + // the next character needs to be a slash (if there is a next character) for it to be + // the windows drive form. otherwise it could be /tmp, which would obviously not be + // intended as a windows path. + if ( (length() < 3) || (get(2) == DEFAULT_SEPARATOR) ) { + // cool, this should be interpretable as an msys path, except for those wacky types + // that use top-level single character directory names. we cannot help that, because + // we *know* msys is a choice used in other code a lot. +//hmmm: future revision: see if the file or directory '/x' actually exists on current drive? yuck. + zap(0, 0); // take off initial slash. + insert(1, ":"); // add the obligatory colon. + } + } +#endif + +LOG(astring("ha ha turned string into: ") + *this); + // we don't crop the last separator if the name's too small. for msdos // names, that would be chopping a slash off the c:\ style name. if (length() > 3) { diff --git a/nucleus/library/filesystem/filename.h b/nucleus/library/filesystem/filename.h index a6018ef5..1f2c7d3b 100644 --- a/nucleus/library/filesystem/filename.h +++ b/nucleus/library/filesystem/filename.h @@ -23,8 +23,7 @@ // forward declarations. class status_info; -//hmmm: this doesn't really belong here, does it. - +//hmmm: this doesn't really belong here, does it... // define useful constant for filesystem path length. #ifndef MAX_ABS_PATH #ifdef __WIN32__ diff --git a/nucleus/library/textual/parser_bits.cpp b/nucleus/library/textual/parser_bits.cpp index 0bf33179..702db612 100644 --- a/nucleus/library/textual/parser_bits.cpp +++ b/nucleus/library/textual/parser_bits.cpp @@ -135,6 +135,19 @@ bool parser_bits::is_alphanumeric(const char *look_at, int len) bool parser_bits::is_alphanumeric(const astring &look_at, int len) { return is_alphanumeric(look_at.observe(), len); } +bool parser_bits::is_alpha(char look_at) +{ return range_check(look_at, 'a', 'z') || range_check(look_at, 'A', 'Z'); } + +bool parser_bits::is_alpha(const char *look_at, int len) +{ + for (int i = 0; i < len; i++) + if (!is_alpha(look_at[i])) return false; + return true; +} + +bool parser_bits::is_alpha(const astring &look_at, int len) +{ return is_alpha(look_at.observe(), len); } + bool parser_bits::is_identifier(char look_at) { return range_check(look_at, 'a', 'z') diff --git a/nucleus/library/textual/parser_bits.h b/nucleus/library/textual/parser_bits.h index 6ca2a21d..e697654a 100644 --- a/nucleus/library/textual/parser_bits.h +++ b/nucleus/library/textual/parser_bits.h @@ -90,6 +90,14 @@ public: static bool is_alphanumeric(const basis::astring &look_at, int len); //!< returns true if the string "look_at" is all alphanumeric characters. + static bool is_alpha(char look_at); + //!< returns true if "look_at" is one of the alphabetical characters. + /*!< This includes a to z in either case. */ + static bool is_alpha(const char *look_at, int len); + //!< returns true if the char ptr "look_at" is all alphabetical characters. + static bool is_alpha(const basis::astring &look_at, int len); + //!< returns true if the string "look_at" is all alphabetical characters. + static bool is_numeric(char look_at); //!< returns true if "look_at" is a valid numerical character. /*! this allows the '-' character for negative numbers also (but only for -- 2.34.1