wow. that was easy: git mv core nucleus

[feisty_meow.git] / nucleus / library / configuration / ini_parser.cpp
diff --git a/nucleus/library/configuration/ini_parser.cpp b/nucleus/library/configuration/ini_parser.cpp

new file mode 100644 (file)

index 0000000..bb881c8
--- /dev/null
+++ b/nucleus/library/configuration/ini_parser.cpp
@@ -0,0 +1,237 @@
+/*****************************************************************************\
+*                                                                             *
+*  Name   : ini_parser                                                        *
+*  Author : Chris Koeritz                                                     *
+*                                                                             *
+*******************************************************************************
+* Copyright (c) 2000-$now By Author.  This program is free software; you can  *
+* redistribute it and/or modify it under the terms of the GNU General Public  *
+* License as published by the Free Software Foundation; either version 2 of   *
+* the License or (at your option) any later version.  This is online at:      *
+*     http://www.fsf.org/copyleft/gpl.html                                    *
+* Please send any updates to: fred@gruntose.com                               *
+\*****************************************************************************/
+
+#include "ini_parser.h"
+#include "table_configurator.h"
+#include "variable_tokenizer.h"
+
+#include <basis/astring.h>
+#include <basis/functions.h>
+#include <structures/amorph.h>
+#include <structures/string_array.h>
+#include <structures/string_table.h>
+#include <textual/parser_bits.h>
+
+//#define DEBUG_INI_PARSER
+  // uncomment for noisy version.
+
+#undef LOG
+#ifdef DEBUG_INI_PARSER
+  #define LOG(to_print) printf("%s\n", astring(to_print).s())
+#else
+  #define LOG(a) {}
+#endif
+
+//////////////
+
+using namespace basis;
+using namespace structures;
+using namespace textual;
+//using namespace ;
+
+namespace configuration {
+
+//algorithm:
+//  gather section until next section definition or end of file.
+//  parse the section with variable_tokenizer.
+//  eat that out of the string.
+//  repeat.
+
+ini_parser::ini_parser(const astring &to_parse, treatment_of_defaults behavior)
+: table_configurator(behavior),
+  _well_formed(false),
+  _preface(new astring)
+{
+  reset(to_parse);
+}
+
+ini_parser::~ini_parser()
+{
+  WHACK(_preface);
+}
+
+void ini_parser::chow_through_eol(astring &to_chow)
+{
+  while (to_chow.length()) {
+    if (parser_bits::is_eol(to_chow[0])) {
+      // zap all carriage return type chars now that we found one.
+      while (to_chow.length() && parser_bits::is_eol(to_chow[0])) {
+        *_preface += to_chow[0];
+        to_chow.zap(0, 0);
+      }
+      return;  // mission accomplished.
+    }
+    *_preface += to_chow[0];
+    to_chow.zap(0, 0);
+  }
+}
+
+/*
+//this is a super expensive operation...
+// it would be better to have the parser be a bit more intelligent.
+void strip_blank_lines(astring &to_strip)
+{
+  bool last_was_ret = false;
+  for (int i = 0; i < to_strip.length(); i++) {
+    if (parser_bits::is_eol(to_strip[i])) {
+      if (last_was_ret) {
+        // two in a row; now that's bogus.
+        to_strip.zap(i, i);
+        i--;  // skip back.
+        continue;
+      }
+      last_was_ret = true;
+      to_strip[i] = '\n';  // make sure we know which type to look for.
+    } else {
+      if (last_was_ret && parser_bits::white_space(to_strip[i])) {
+        // well, the last was a return but this is white space.  that's also
+        // quite bogus.
+        to_strip.zap(i, i);
+        i--;  // skip back.
+        continue;
+      }
+      last_was_ret = false;
+    }
+  }
+}
+*/
+
+void ini_parser::reset(const astring &to_parse)
+{
+  _well_formed = false;
+  table_configurator::reset();  // clean out existing contents.
+  _preface->reset();  // set the preface string back to nothing.
+  add(to_parse);
+}
+
+void ini_parser::add(const astring &to_parse)
+{
+  astring parsing = to_parse;
+//  strip_blank_lines(parsing);
+  _preface->reset();  // set the preface string back to nothing.
+  while (parsing.length()) {
+    astring section_name;
+    bool found_sect = parse_section(parsing, section_name);
+    if (!found_sect) {
+      // the line is not a section name.  toss it.
+      chow_through_eol(parsing);
+      continue;  // try to find another section name.
+    }
+    // we got a section.  yee hah.
+    int next_sect = 0;
+    for (next_sect = 0; next_sect < parsing.length(); next_sect++) {
+//      LOG(astring("[") + astring(parsing[next_sect], 1) + "]");
+      if (parser_bits::is_eol(parsing[next_sect])) {
+        // we found the requisite return; let's see if a section beginning
+        // is just after it.  we know nothing else should be, since we stripped
+        // out the blank lines and blanks after CRs.
+        if (parsing[next_sect + 1] == '[') {
+          // aha, found the bracket that should be a section start.
+          break;  // done seeking next section beginning.
+        }
+      }
+    }
+    // skip back one if we hit the end of the string.
+    if (next_sect >= parsing.length()) next_sect--;
+    // now grab what should be all values within a section.
+    LOG(a_sprintf("bounds are %d to %d, string len is %d.", 0, next_sect,
+        parsing.length()));
+    astring sect_parsing = parsing.substring(0, next_sect);
+    LOG(astring("going to parse: >>") + sect_parsing + "<<");
+    parsing.zap(0, next_sect);
+    variable_tokenizer section_reader("\n", "=");
+    section_reader.set_comment_chars(";#");
+    section_reader.parse(sect_parsing);
+    LOG(astring("read: ") + section_reader.text_form());
+    merge_section(section_name, section_reader.table());
+  }
+  _well_formed = true;
+}
+
+void ini_parser::merge_section(const astring &section_name,
+    const string_table &to_merge)
+{
+  if (!section_exists(section_name)) {
+    // didn't exist yet, so just plunk it in.
+    put_section(section_name, to_merge);
+    return;
+  }
+  
+  // since the section exists, we just write the individual entries from the
+  // new section.  they'll stamp out any old values.
+  for (int i = 0; i < to_merge.symbols(); i++)
+    put(section_name, to_merge.name(i), to_merge[i]);
+}
+
+bool ini_parser::parse_section(astring &to_parse, astring &section_name)
+{
+  section_name = "";  // reset the section.
+
+  // we have a simple state machine here...
+  enum states {
+    SEEKING_OPENING_BRACKET,  // looking for the first bracket.
+    EATING_SECTION_NAME       // got a bracket, now getting section name.
+  };
+  states state = SEEKING_OPENING_BRACKET;
+
+  // zip through the string trying to find a valid section name.
+  for (int i = 0; i < to_parse.length(); i++) {
+    char curr = to_parse[i];
+    LOG(astring("<") + astring(curr, 1) + ">");
+    switch (state) {
+      case SEEKING_OPENING_BRACKET:
+        // we're looking for the first bracket now...
+        if (parser_bits::white_space(curr)) continue;  // ignore white space.
+        if (curr != '[') return false;  // argh, bad characters before bracket.
+        state = EATING_SECTION_NAME;  // found the bracket.
+        break;
+      case EATING_SECTION_NAME:
+        // we're adding to the section name now...
+        if (curr == ']') {
+          // that's the end of the section name.
+          to_parse.zap(0, i);  // remove what we saw.
+//should we take out to end of line also?
+//eventually up to eol could be kept as a comment?
+          return true;
+        }
+        section_name += curr;  // add a character to the name.
+        break;
+      default:
+        //LOG("got to unknown case in section parser!");
+        return false;
+    }
+  }
+  // if we got to here, the section was badly formed...  the whole string was
+  // parsed through but no conclusion was reached.
+  return false;
+}
+
+bool ini_parser::restate(astring &new_ini, bool add_spaces)
+{
+  new_ini = *_preface;  // give it the initial text back again.
+  string_array sects;
+  sections(sects);
+  for (int i = 0; i < sects.length(); i++) {
+    new_ini += astring("[") + sects[i] + "]" + parser_bits::platform_eol_to_chars();
+    string_table tab;
+    if (!get_section(sects[i], tab)) continue;  // serious error.
+    tab.add_spaces(add_spaces);
+    new_ini += tab.text_form();
+  }
+  return true;
+}
+
+} //namespace.
+
+