nucleus/tools/clam_tools/value_tagger.cpp

   1 /*****************************************************************************\
   2 *                                                                             *
   3 *  Name   : value_tagger                                                      *
   4 *  Author : Chris Koeritz                                                     *
   5 *                                                                             *
   6 *  Purpose:                                                                   *
   7 *                                                                             *
   8 *    Scoots through the entire known code base and builds a list of all the   *
   9 *  outcome (and filter) values for that tree.  A manifest of the names is     *
  10 *  produced.  Most of the behavior is driven by the ini file whose name is    *
  11 *  passed on the command line.                                                *
  12 *    Note that the set of items that can be searched for can be specified     *
  13 *  in the ini file, although they must follow the format of:                  *
  14 *      pattern(name, value, description)                                      *
  15 *  where the "pattern" is the search term and the other three items specify   *
  16 *  the enumerated value to be marked.                                         *
  17 *                                                                             *
  18 *******************************************************************************
  19 * Copyright (c) 2005-$now By Author.  This program is free software; you can  *
  20 * redistribute it and/or modify it under the terms of the GNU General Public  *
  21 * License as published by the Free Software Foundation; either version 2 of   *
  22 * the License or (at your option) any later version.  This is online at:      *
  23 *     http://www.fsf.org/copyleft/gpl.html                                    *
  24 * Please send any updates to: fred@gruntose.com                               *
  25 \*****************************************************************************/
  26
  27 #include <application/application_shell.h>
  28 #include <application/command_line.h>
  29 #include <application/hoople_main.h>
  30 #include <application/windoze_helper.h>
  31 #include <basis/environment.h>
  32 #include <basis/functions.h>
  33 #include <basis/utf_conversion.h>
  34 #include <configuration/ini_configurator.h>
  35 #include <filesystem/byte_filer.h>
  36 #include <filesystem/directory_tree.h>
  37 #include <filesystem/filename.h>
  38 #include <loggers/combo_logger.h>
  39 #include <loggers/critical_events.h>
  40 #include <loggers/program_wide_logger.h>
  41 #include <structures/set.h>
  42 #include <structures/string_array.h>
  43 #include <structures/string_table.h>
  44 #include <timely/time_stamp.h>
  45 #include <textual/parser_bits.h>
  46
  47 #include <sys/stat.h>
  48
  49 #include "../../library/algorithms/sorts.h"
  50 #ifdef __WIN32__
  51   #include <io.h>
  52 #endif
  53
  54 #undef LOG
  55 #define LOG(s) EMERGENCY_LOG(program_wide_logger::get(), astring(s))
  56
  57 using namespace algorithms;
  58 using namespace application;
  59 using namespace basis;
  60 using namespace configuration;
  61 using namespace filesystem;
  62 using namespace loggers;
  63 using namespace structures;
  64 using namespace textual;
  65 using namespace timely;
  66
  67 const int LONGEST_SEPARATION = 128;
  68   // the longest we expect a single line of text to be in definition blocks.
  69   // if the definition of an outcome or whatever is farther away than this
  70   // many characters from a comment start, we will no longer consider the
  71   // line to be commented out.  this pretty much will never happen unless it's
  72   // intentionally done to break this case.
  73
  74 const char *SKIP_VALUE_PHRASE = "SKIP_TO_VALUE";
  75   // the special phrase we use to indicate that values should jump to
  76   // a specific number.
  77
  78 ////////////////////////////////////////////////////////////////////////////
  79
  80 // this object records all the data that we gather for the defined items.
  81 class item_record
  82 {
  83 public:
  84   astring _name;
  85   int _value;
  86   astring _description;
  87   astring _path;
  88   astring _extra_tag;  //!< records special info for links.
  89
  90   item_record(const astring &name = astring::empty_string(), int value = 999,
  91       const astring &description = astring::empty_string(),
  92       const astring &path = astring::empty_string(),
  93       const astring &extra_tag = astring::empty_string())
  94   : _name(name), _value(value), _description(description), _path(path),
  95     _extra_tag(extra_tag) {}
  96 };
  97
  98 ////////////////////////////////////////////////////////////////////////////
  99
 100 class search_record
 101 {
 102 public:
 103   search_record(const astring &search = astring::empty_string(),
 104       bool is_link = false, search_record *link = NULL_POINTER)
 105   : _search(search), _no_modify(false), _is_link(is_link), _our_link(link),
 106     _current_value(0), _value_increment(1) {}
 107
 108   // these properties are available for both real or linked records.
 109   astring _search;  // our term to search for in the files.
 110   bool _no_modify;  // true if values should not be automatically incremented.
 111   astring _tag;  // extra information attached to this type.
 112
 113   bool is_link() const { return _is_link; }
 114     // returns true if this object is leeching off another object for data.
 115
 116   search_record *our_link() const { return _our_link; }
 117     // returns the object that this object is a mere shadow of.
 118
 119   symbol_table<item_record> &definitions() {
 120     if (is_link()) return _our_link->_definitions;
 121     else return _definitions;
 122   }
 123
 124   int &current_value() {
 125     if (is_link()) return _our_link->_current_value;
 126     else return _current_value;
 127   }
 128
 129   int &value_increment() {
 130     if (is_link()) return _our_link->_value_increment;
 131     else return _value_increment;
 132   }
 133
 134   int_set &out_of_band() {
 135     if (is_link()) return _our_link->_out_of_band;
 136     else return _out_of_band;
 137   }
 138
 139 private:
 140   bool _is_link;  // true if this object links to another.
 141   search_record *_our_link;  // the search we share for our values.
 142   symbol_table<item_record> _definitions;
 143     // the definitions that we found in the code.
 144   int _current_value;  // the next value to use for our term.
 145   int _value_increment;
 146     // how much to add for each new value, if this is an incrementing search.
 147   int_set _out_of_band;
 148     // values we've seen that were premature.  we always want to honor this
 149     // set, if it exists, but there will be nothing in it if the search has
 150     // completely standard non-incrementing type.  this could be varied by
 151     // a non-incrementer linking to a standard incrementer.
 152 };
 153
 154 //! a table of terms that we will search for in the code.
 155 class active_searches : public symbol_table<search_record>
 156 {};
 157
 158 ////////////////////////////////////////////////////////////////////////////
 159
 160 // this class provides us a way to easily sort our items based on value.
 161
 162 class simple_sorter {
 163 public:
 164   int _index;
 165   int _value;
 166   simple_sorter(int index = 0, int value = 0) : _index(index), _value(value) {}
 167   bool operator < (const simple_sorter &to_compare) const
 168     { return _value < to_compare._value; }
 169   bool operator > (const simple_sorter &to_compare) const
 170     { return _value > to_compare._value; }
 171   bool operator == (const simple_sorter &to_compare) const
 172     { return _value == to_compare._value; }
 173 };
 174
 175 class sorting_array : public array<simple_sorter> {};
 176
 177 ////////////////////////////////////////////////////////////////////////////
 178
 179 class value_tagger : public application_shell
 180 {
 181 public:
 182   value_tagger();
 183   virtual ~value_tagger();
 184   DEFINE_CLASS_NAME("value_tagger");
 185   int execute();
 186   int print_instructions_and_exit();
 187
 188   bool process_tree(const astring &path);
 189     // called on each directory hierarchy that we need to process.
 190
 191   bool process_file(const astring &path);
 192     // examines the file specified to see if it matches our needs.
 193
 194   bool parse_define(const astring &scanning, int indy, astring &name,
 195           int &value, astring &description, int &num_start, int &num_end);
 196     // processes the string in "scanning" to find parentheses surrounding
 197     // the "name", "value" and "description".  the "description" field may
 198     // occupy multiple lines, so all are gathered together to form one
 199     // unbroken string.  the "num_start" and "num_end" specify where the
 200     // numeric value was found, in case it needs to be patched.
 201
 202 private:
 203   ini_configurator *_ini;  // the configuration for what we'll scan.
 204   string_table _dirs;  // the list of directories.
 205   string_table _dirs_seen;  // full list of already processed directories.
 206   filename _manifest_filename;  // the name of the manifest we'll create.
 207   byte_filer _manifest;  // the actual file we're building.
 208   active_searches _search_list;  // tracks our progress in scanning files.
 209   int_array _search_ordering;
 210     // lists the terms in the order they should be applied.  initially this
 211     // carries the first pass items, but later will be reset for second pass.
 212   int_array _postponed_searches;
 213     // lists the searches that must wait until the main search is done.
 214   string_table _modified_files;  // the list of files that we touched.
 215 };
 216
 217 ////////////////////////////////////////////////////////////////////////////
 218
 219 value_tagger::value_tagger()
 220 : application_shell(),
 221   _ini(NULL_POINTER),
 222   _dirs_seen(10)
 223 {
 224 }
 225
 226 value_tagger::~value_tagger()
 227 {
 228   WHACK(_ini);
 229 }
 230
 231 int value_tagger::print_instructions_and_exit()
 232 {
 233   LOG(a_sprintf("%s usage:", filename(_global_argv[0]).basename().raw().s()));
 234   LOG("");
 235
 236   LOG("\
 237 This utility scans a code base for outcome and filter definitions.  It will\n\
 238 only scan the header files (*.h) found in the directories specified.  The\n\
 239 single parameter is expected to be an INI filename that contains the scanning\n\
 240 configuration.  The INI file should be formatted like this (where the $HOME\n\
 241 can be any variable substitution from the environment):");
 242   LOG("");
 243   LOG("\
 244 [manifest]\n\
 245 output=$HOME/manifest.txt\n\
 246 \n\
 247 [searches]\n\
 248 DEFINE_OUTCOME=1\n\
 249 DEFINE_FILTER=1\n\
 250 \n\
 251 [directories]\n\
 252 $HOME/source/lib_src/library/basis\n\
 253 $HOME/source/lib_src/library\n\
 254 $HOME/source/lib_src/communication/sockets\n\
 255 $HOME/source/lib_src/communication\n\
 256 $HOME/source/lib_src\n\
 257 $HOME/source/app_src\n\
 258 $HOME/source/test_src\n\
 259 \n\
 260 [DEFINE_OUTCOME]\n\
 261 first=0\n\
 262 increment=-1\n\
 263 \n\
 264 [DEFINE_FILTER]\n\
 265 first=-1\n\
 266 increment=1\n\
 267 no_modify=1\n\
 268 \n\
 269 [DEFINE_API_OUTCOME]\n\
 270 no_modify=1\n\
 271 link=DEFINE_OUTCOME\n\
 272 tag=API\n\
 273 \n\
 274   The \"first\" field defines the starting value that should be assigned to\n\
 275 items.\n\
 276   The \"increment\" field specifies what to add to a value for the next item.\n\
 277   The optional \"no_modify\" flag means that the values should not be auto-\n\
 278 incremented; their current value will be used.\n\
 279   The optional \"link\" field defines this type of item as using the current\n\
 280 values for another type of item.  In this case, API_OUTCOME will use the\n\
 281 values for OUTCOME to share its integer space, but API_OUTCOME is not auto-\n\
 282 incremented even though OUTCOME is.  This causes the values for OUTCOME and\n\
 283 API_OUTCOME to be checked for uniqueness together, but only OUTCOME will be\n\
 284 auto-incremented.  Note that only one level of linking is supported currently.\n\
 285   The optional \"tag\" can be used to distinguish the entries for a particular\n\
 286 search type if needed.  This is most helpful for links, so that they can be\n\
 287 distinguished from their base type.\n\
 288 \n\
 289 ");
 290
 291   return 23;
 292 }
 293
 294 astring header_string(const astring &build_number)
 295 {
 296   return a_sprintf("\
 297 #ifndef GENERATED_VALUES_MANIFEST\n\
 298 #define GENERATED_VALUES_MANIFEST\n\
 299 \n\
 300 // This file contains all outcomes and filters for this build.\n\
 301 \n\
 302 // Generated for build %s on %s\n\
 303 \n\
 304 ", build_number.s(), time_stamp::notarize(true).s());
 305 }
 306
 307 astring footer_string(const byte_array &full_config_file)
 308 {
 309   return a_sprintf("\n\
 310 // End of definitions.\n\
 311 \n\
 312 \n\
 313 // The following is the full configuration for this build:\n\
 314 \n\
 315 /*\n\
 316 \n\
 317 %s\n\
 318 */\n\
 319 \n\
 320 \n\
 321 #endif // outer guard.\n\
 322 ", (char *)full_config_file.observe());
 323 }
 324
 325 int value_tagger::execute()
 326 {
 327   FUNCDEF("execute");
 328   if (_global_argc < 2) {
 329     return print_instructions_and_exit();
 330   }
 331
 332   log(time_stamp::notarize(true) + "value_tagger started.", basis::ALWAYS_PRINT);
 333
 334   astring test_repository = environment::get("FEISTY_MEOW_APEX");
 335   if (!test_repository) {
 336     astring msg = "\
 337 There is a problem with a required build precondition.  The following\r\n\
 338 variables must be set before the build is run:\r\n\
 339 \r\n\
 340   FEISTY_MEOW_APEX    This should point at the root of the build tree.\r\n\
 341 \r\n\
 342 There are also a few variables only required for CLAM-based compilation:\r\n\
 343 \r\n\
 344   MAKEFLAGS         This should be set to \"-I $FEISTY_MEOW_APEX/clam\".\r\n\
 345 \r\n\
 346 Note that on Win32 platforms, these should be set in the System or User\r\n\
 347 variables before running a build.\r\n";
 348 #ifdef __WIN32__
 349     ::MessageBox(0, to_unicode_temp(msg),
 350         to_unicode_temp("Missing Precondition"), MB_ICONWARNING|MB_OK);
 351 #endif
 352     non_continuable_error(class_name(), func, msg);
 353   }
 354
 355   astring ini_file = _global_argv[1];  // the name of our ini file.
 356   _ini = new ini_configurator(ini_file, ini_configurator::RETURN_ONLY);
 357
 358   // read the name of the manifest file to create.
 359   _manifest_filename = filename(_ini->load("manifest", "output", ""));
 360   if (!_manifest_filename.raw().length()) {
 361     non_continuable_error(class_name(), ini_file, "The 'output' file entry is missing");
 362   }
 363   _manifest_filename = parser_bits::substitute_env_vars(_manifest_filename);
 364
 365   LOG(astring("Sending Manifest to ") + _manifest_filename);
 366   LOG("");
 367
 368   filename(_manifest_filename).unlink();
 369     // clean out the manifest ahead of time.
 370
 371   // read the list of directories to scan for code.
 372   string_table temp_dirs;
 373   bool read_dirs = _ini->get_section("directories", temp_dirs);
 374   if (!read_dirs || !temp_dirs.symbols()) {
 375     non_continuable_error(class_name(), ini_file,
 376         "The 'directories' section is missing");
 377   }
 378   for (int i = 0; i < temp_dirs.symbols(); i++) {
 379 //log(astring("curr is ") + current);
 380     filename current = filename(parser_bits::substitute_env_vars(temp_dirs.name(i)));
 381     _dirs.add(current, "");
 382   }
 383
 384   LOG(astring("Directories to scan..."));
 385   LOG(_dirs.text_form());
 386
 387   astring rdir = environment::get("FEISTY_MEOW_APEX");
 388   astring fname;
 389   astring parmfile = environment::get("BUILD_PARAMETER_FILE");
 390   if (parmfile.t()) fname = parmfile;
 391   else fname = rdir + "/build.ini";
 392
 393   // read the list of search patterns.
 394   string_table searches;
 395   bool read_searches = _ini->get_section("searches", searches);
 396   if (!read_searches || !searches.symbols()) {
 397     non_continuable_error(class_name(), ini_file,
 398         "The 'searches' section is missing");
 399   }
 400
 401   LOG("Searching for...");
 402   LOG(searches.text_form());
 403
 404   // now make sure that we get the configuration for each type of value.
 405   for (int i = 0; i < searches.symbols(); i++) {
 406     const astring &curr_name = searches.name(i);
 407
 408     search_record *check_search = _search_list.find(curr_name);
 409     if (check_search) {
 410       non_continuable_error(class_name(), ini_file,
 411           astring("section ") + curr_name + " is being defined twice");
 412     }
 413
 414     {
 415       // check for whether this section is linked to another or not.
 416       astring linked = _ini->load(curr_name, "link", "");
 417       search_record *our_link_found = NULL_POINTER;
 418       if (linked.t()) {
 419         // we found that this should be linked to another item.
 420         our_link_found = _search_list.find(linked);
 421         if (!our_link_found) {
 422           non_continuable_error(class_name(), ini_file,
 423               astring("linked section ") + curr_name + " is linked to missing "
 424                   "section " + linked);
 425         }
 426         search_record new_guy(curr_name, true, our_link_found);
 427         _search_list.add(curr_name, new_guy);
 428       } else {
 429         // this section is a stand-alone section.
 430         search_record new_guy(curr_name);
 431         _search_list.add(curr_name, new_guy);
 432       }
 433     }
 434
 435     // find our new search cabinet again so we can use it.
 436     search_record *curr_search = _search_list.find(curr_name);
 437     if (!curr_search) {
 438       non_continuable_error(class_name(), ini_file,
 439           astring("section ") + curr_name + " is missing from table "
 440               "after addition; logic error");
 441     }
 442
 443     // specify some defaults first.
 444     int start = 0;
 445     int increm = 1;
 446     if (!curr_search->is_link()) {
 447       // a linked object doesn't get to specify starting value or increment.
 448       start = _ini->load(curr_name, "first", start);
 449       curr_search->current_value() = start;
 450       increm = _ini->load(curr_name, "increment", increm);
 451       curr_search->value_increment() = increm;
 452     } else {
 453       start = curr_search->our_link()->current_value();
 454       increm = curr_search->our_link()->value_increment();
 455     }
 456
 457     int no_modify = _ini->load(curr_name, "no_modify", 0);
 458     if (no_modify) {
 459       curr_search->_no_modify = true;
 460     }
 461
 462     astring tag = _ini->load(curr_name, "tag", "");
 463     if (tag.t()) {
 464       curr_search->_tag = tag;
 465     }
 466
 467     a_sprintf to_show("%s: no_modify=%s", curr_name.s(),
 468          no_modify? "true" : "false");
 469
 470     if (curr_search->is_link()) {
 471       // links show who they're hooked to.
 472       to_show += astring(" link=") + curr_search->our_link()->_search;
 473     } else {
 474       // non-links get to show off their start value and increment.
 475       to_show += a_sprintf(" start=%d increment=%d", start, increm);
 476     }
 477     if (tag.t()) {
 478       to_show += astring(" tag=") + curr_search->_tag;
 479     }
 480     LOG(to_show);
 481   }
 482   LOG("");
 483
 484   // now gather some info about the build that we can plug into the manifest.
 485
 486   byte_filer build_file(fname, "r");
 487   if (!build_file.good()) {
 488     non_continuable_error(class_name(), build_file.name(),
 489         "Could not find the build configuration; is FEISTY_MEOW_APEX set?");
 490   }
 491   byte_array full_config;
 492   build_file.read(full_config, 100000);  // a good chance to be big enough.
 493   build_file.close();
 494
 495 //log("got config info:");
 496 //log((char *)full_config.observe());
 497
 498   astring build_number;
 499   ini_configurator temp_ini(fname, configurator::RETURN_ONLY);
 500   build_number += temp_ini.load("version", "major", "");
 501   build_number += ".";
 502   build_number += temp_ini.load("version", "minor", "");
 503   build_number += ".";
 504   build_number += temp_ini.load("version", "revision", "");
 505   build_number += ".";
 506   build_number += temp_ini.load("version", "build", "");
 507   if (build_number.equal_to("...")) {
 508     non_continuable_error(class_name(), build_file.name(),
 509         "Could not read the build number; is build parameter file malformed?");
 510   }
 511
 512 //log(astring("got build num: ") + build_number);
 513
 514   // now that we know what file to create, write the header blob for it.
 515   _manifest.open(_manifest_filename, "wb");
 516   if (!_manifest.good()) {
 517     non_continuable_error(class_name(), _manifest_filename,
 518         "Could not write to the manifest file!");
 519   }
 520   _manifest.write(header_string(build_number));
 521
 522   // make sure we have the right ordering for our terms.  items that are
 523   // non-modify types must come before the modifying types.
 524   for (int i = 0; i < _search_list.symbols(); i++) {
 525     search_record &curr_reco = _search_list[i];
 526     if (curr_reco._no_modify)
 527       _search_ordering += i;
 528     else
 529       _postponed_searches += i;
 530   }
 531
 532   // scan across each directory specified for our first pass.
 533   LOG("First pass...");
 534   for (int i = 0; i < _dirs.symbols(); i++) {
 535     if (_dirs.name(i).begins("#") || _dirs.name(i).begins(";")) continue;  // skip comment.
 536     LOG(astring("  Processing: ") + _dirs.name(i));
 537     bool ret = process_tree(_dirs.name(i));
 538     if (!ret) {
 539       LOG(astring("Problem encountered in directory ") + _dirs.name(i));
 540     }
 541   }
 542   LOG("");
 543
 544   // second pass now.
 545   LOG("Second pass...");
 546   _search_ordering = _postponed_searches;  // recharge the list for 2nd pass.
 547   _dirs_seen.reset();  // drop any directories we saw before.
 548   for (int i = 0; i < _dirs.symbols(); i++) {
 549     if (_dirs.name(i).begins("#") || _dirs.name(i).begins(";")) continue;  // skip comment.
 550     LOG(astring("  Processing: ") + _dirs.name(i));
 551     bool ret = process_tree(_dirs.name(i));
 552     if (!ret) {
 553       LOG(astring("Problem encountered in directory ") + _dirs.name(i));
 554     }
 555   }
 556   LOG("");
 557
 558   const astring quote = "\"";
 559   const astring comma = ",";
 560
 561   // scoot across all the completed searches and dump results.
 562   for (int i = 0; i < _search_list.symbols(); i++) {
 563     search_record &curr_reco = _search_list[i];
 564     const astring &pattern = curr_reco._search;
 565
 566     _manifest.write(astring("/* START ") + pattern + "\n");
 567     _manifest.write(astring("[") + pattern + "]\n");
 568
 569     if (!curr_reco.is_link()) {
 570       // scoot across all definitions and print them out.
 571
 572       // do the print out in order, as dictated by the sign of the increment.
 573       sorting_array sortie;
 574       for (int j = 0; j < curr_reco.definitions().symbols(); j++) {
 575         const item_record &rec = curr_reco.definitions().get(j);
 576         sortie += simple_sorter(j, rec._value);
 577       }
 578       shell_sort(sortie.access(), sortie.length(),
 579           negative(curr_reco.value_increment()));
 580
 581       for (int j = 0; j < sortie.length(); j++) {
 582         int indy = sortie[j]._index;
 583         const item_record &rec = curr_reco.definitions().get(indy);
 584         astring to_write = "  ";
 585         if (rec._extra_tag.t()) {
 586           to_write += astring("(") + rec._extra_tag + ") ";
 587         }
 588         to_write += quote + rec._name + quote + comma + " ";
 589         to_write += quote + a_sprintf("%d", rec._value) + quote + comma + " ";
 590         to_write += quote + rec._description + quote + comma + " ";
 591         to_write += quote + rec._path + quote;
 592         to_write += "\n";
 593         _manifest.write(to_write);
 594       }
 595     } else {
 596       // this is just a link.
 597       astring to_write = "  Linked to search item ";
 598       to_write += curr_reco.our_link()->_search;
 599       to_write += "\n";
 600       _manifest.write(to_write);
 601     }
 602
 603     _manifest.write(astring("END ") + pattern + " */\n\n");
 604   }
 605
 606   _manifest.write(footer_string(full_config));
 607
 608   // show all the modified files.
 609   if (_modified_files.symbols()) {
 610     const int syms = _modified_files.symbols();
 611     LOG("Modified Files:");
 612     LOG("===============");
 613     for (int i = 0; i < syms; i++) {
 614       LOG(_modified_files.name(i));
 615     }
 616   } else {
 617     LOG("No files needed modification for generated values.");
 618   }
 619   LOG("");
 620
 621   log(time_stamp::notarize(true) + "value_tagger finished.", ALWAYS_PRINT);
 622
 623   return 0;
 624 }
 625
 626 #define INBO (indy < scanning.length())
 627   // a macro that makes length checking less verbose.
 628
 629 // make sure we drop any spaces in between important bits.
 630 #define SKIP_SPACES \
 631   while (INBO && parser_bits::white_space(scanning[indy])) indy++;
 632
 633 // return with a failure but say why it happened.
 634 #define FAIL_PARSE(why) { \
 635   log(astring("failed to parse the string because ") + why + ".", ALWAYS_PRINT); \
 636   return false; \
 637 }
 638
 639 bool value_tagger::parse_define(const astring &scanning, int indy,
 640     astring &name, int &value, astring &description, int &num_start,
 641     int &num_end)
 642 {
 643   // prepare our result objects.
 644   name = ""; value = -1; description = ""; num_start = -1; num_end = -1;
 645
 646   SKIP_SPACES;
 647
 648   // look for starting parenthesis.
 649   if (!INBO || (scanning[indy] != '(') )
 650     FAIL_PARSE("the first parenthesis is missing");
 651
 652   indy++;  // skip paren.
 653   SKIP_SPACES;
 654
 655   // find the name of the item being defined.
 656   while (INBO && (scanning[indy] != ',') ) {
 657     name += scanning[indy];
 658     indy++;
 659   }
 660
 661   indy++;  // skip the comma.
 662   SKIP_SPACES;
 663
 664   astring num_string;
 665   num_start = indy;
 666   while (INBO && parser_bits::is_numeric(scanning[indy])) {
 667     num_string += scanning[indy];
 668     indy++;
 669   }
 670   num_end = indy - 1;
 671   value = num_string.convert(0);
 672
 673   SKIP_SPACES;
 674
 675   if (!INBO || (scanning[indy] != ',') )
 676     FAIL_PARSE("the post-value comma is missing");
 677
 678   indy++;
 679   SKIP_SPACES;
 680
 681   if (!INBO || (scanning[indy] != '"') )
 682     FAIL_PARSE("the opening quote for the description is missing");
 683
 684   indy++;  // now we should be at raw text.
 685
 686   // scan through the full description, taking into account that it might
 687   // be broken across multiple lines as several quoted bits.
 688   bool in_quote = true;  // we're inside a quote now.
 689   while (INBO && (scanning[indy] != ')') ) {
 690     const char curr = scanning[indy];
 691 //hmmm: escaped quotes are not currently handled.
 692     if (curr == '"') in_quote = !in_quote;  // switch quoting state.
 693     else if (in_quote) description += curr;
 694     indy++;
 695   }
 696
 697   return scanning[indy] == ')';
 698 }
 699
 700 bool value_tagger::process_file(const astring &path)
 701 {
 702   byte_filer examining(path, "rb");
 703   if (!examining.good()) {
 704     log(astring("Error reading file: ") + path, ALWAYS_PRINT);
 705     return false;
 706   }
 707   examining.seek(0, byte_filer::FROM_END);
 708   int fsize = int(examining.tell());
 709   examining.seek(0, byte_filer::FROM_START);
 710
 711   astring contents('\0', fsize + 20);
 712   int bytes_read = examining.read((abyte *)contents.access(), fsize);
 713     // read the file directly into a big astring.
 714   examining.close();
 715   contents[bytes_read] = '\0';
 716   contents.shrink();  // drop any extra stuff at end.
 717
 718   bool modified = false;  // set to true if we need to write the file back.
 719
 720   // check if the file matches our phrases of interest.
 721   bool matched = false;
 722   for (int q = 0; q < _search_list.symbols(); q++) {
 723     search_record &curr_reco = _search_list[q];
 724     if (contents.contains(curr_reco._search)) {
 725 //_manifest.write(astring("MATCH-") + curr_pattern + ": " + path + "\n" ); //temp
 726       matched = true;
 727       break;
 728     }
 729   }
 730
 731   if (!matched) return true;
 732
 733   // now we have verified that there's something interesting in this file.
 734   // go through to find the interesting bits.
 735
 736   // we do this in the search ordering that we established earlier, so we
 737   // will tag the values in the proper order.
 738   for (int x = 0; x < _search_ordering.length(); x++) {
 739     int q = _search_ordering[x];  // get our real index.
 740     search_record &curr_reco = _search_list[q];
 741     const astring &curr_pattern = curr_reco._search;
 742 ///log(astring("now seeking ") + curr_pattern);
 743     int start_from = 0;  // where searches will start from.
 744
 745     while (true) {
 746       // search forward for next match.
 747       int indy = contents.find(curr_pattern, start_from);
 748       if (negative(indy)) break;  // no more matches.
 749       start_from = indy + 5;  // ensure we'll skip past the last match.
 750
 751       // make sure our deadly pattern isn't in front; we don't want to
 752       // process the actual definition of the macro in question.
 753 //log(a_sprintf("indy=%d [indy-1]=%c [indy-2]=%c", indy, contents[indy-1], contents[indy-2]));
 754       if ( (indy > 3) && (contents[indy-1] == ' ')
 755           && (contents[indy-2] == 'e') ) {
 756         int def_indy = contents.find("#define", indy, true);
 757 //log(astring("checking ") + curr_pattern + a_sprintf(": defindy %d, ", def_indy) + path + "\n" );
 758
 759         if (non_negative(def_indy) && (absolute_value(indy - def_indy) < 12) ) {
 760           // they're close enough that we probably need to skip this
 761           // occurrence of our search term.
 762 //_manifest.write(astring("DEMATCH-") + curr_pattern + ": had the #define! " + path + "\n" );
 763           continue;
 764         }
 765       }
 766
 767       // make sure we don't include commented lines in consideration.
 768       int comm_indy = contents.find("//", indy, true);
 769       if (non_negative(comm_indy)) {
 770 //log("found a comment marker");
 771         // we found a comment before the definition, but we're not sure how
 772         // far before.
 773         if (absolute_value(comm_indy - indy) < LONGEST_SEPARATION) {
 774 //log("comment is close enough...");
 775           // they could be on the same line...  unless lines are longer than
 776           // our constant.
 777           bool found_cr = false;
 778           for (int q = comm_indy; q < indy; q++) {
 779             if (parser_bits::is_eol(contents[q])) {
 780               found_cr = true;
 781               break;
 782             }
 783           }
 784           if (!found_cr) {
 785             // if there's a comment before the definition and no carriage
 786             // returns in between, then this is just a comment.
 787 //log(astring("DEMATCH-") + curr_pattern + ": had the comment! " + path + "\n" );
 788             continue;
 789           }
 790         }
 791       }
 792
 793       // now we are pretty sure this is a righteous definition of an outcome,
 794       // and not the definition of the macro itself.
 795       int value, num_start, num_end;
 796       astring name, description;
 797       bool found_it = parse_define(contents, indy + curr_pattern.length(),
 798           name, value, description, num_start, num_end);
 799       if (!found_it) {
 800         log(astring("there was a problem parsing ") + curr_pattern + " in " + path, ALWAYS_PRINT);
 801         continue;
 802       }
 803
 804       // handle the special keyword for changing the value.  this is useful
 805       // if you want a set of outcomes to start at a specific range.
 806       if (name.equal_to(SKIP_VALUE_PHRASE)) {
 807         LOG(astring("\tSkipping value for ") + curr_pattern
 808             + a_sprintf(" to %d because of request in\n\t", value) + path);
 809         curr_reco.current_value() = value;
 810       }
 811       while (true) {
 812         // make sure that the current value is not already in use.
 813         if (!curr_reco.out_of_band().member(curr_reco.current_value()))
 814           break;
 815         // if we had a match above, we need to adjust the current value.
 816         curr_reco.current_value() += curr_reco.value_increment();
 817       }
 818       if (name.equal_to(SKIP_VALUE_PHRASE)) {
 819         continue;  // keep going now that we vetted the current value.
 820       }
 821
 822 //must catch some conditions here for values:
 823 //  for incrementing types, we can always just try to use the next value
 824 //  once we know it wasn't already defined out of band?
 825 //  for non-incrementing types, we need to ensure we haven't already seen
 826 //  the thing.  do we just always add a value seen to out of band?
 827 //  for mixed types, the incrementing side needs to not reuse out of band
 828 //  values.
 829
 830       astring other_place;  // the other place it was defined.
 831       if (curr_reco.out_of_band().member(value) && curr_reco._no_modify) {
 832         // this is bad; we have already seen this value elsewhere...
 833         for (int x = 0; x < curr_reco.definitions().symbols(); x++) {
 834           // see if we can find the previous definition in our list.
 835           if (value == curr_reco.definitions()[x]._value)
 836             other_place = curr_reco.definitions()[x]._path;
 837         }
 838         non_continuable_error(class_name(), path,
 839             a_sprintf("There is a duplicate value here for %s=%d !  "
 840                 "Also defined in %s.", name.s(), value, other_place.s()));
 841       }
 842
 843       // we care sometimes that this value is different than the next
 844       // sequential one we'd assign.  if it's a non-modifying type of
 845       // search, then we can't change the assigned value anyway--we can
 846       // only report the error in re-using a value (above).
 847       if (!curr_reco._no_modify) {
 848         // check that the defined value matches the next one we'd assign.
 849         if (value != curr_reco.current_value()) {
 850           // patch the value with the appropriate one we've been tracking.
 851           modified = true;
 852           value = curr_reco.current_value();
 853           contents.zap(num_start, num_end);  // remove old fusty value.
 854           contents.insert(num_start, a_sprintf("%d", value));
 855           _modified_files.add(path, "");
 856         }
 857         // move the current value up (or down).
 858         curr_reco.current_value() += curr_reco.value_increment();
 859       } else {
 860         // non-modifying type of value here.
 861 //anything to do?
 862       }
 863
 864       curr_reco.out_of_band() += value;
 865         // we've vetted the value, and now we're definitely using it.
 866
 867       // make sure they aren't trying to reuse the name for this item.
 868       item_record rec;
 869       bool found_name = false;  // we don't want to find name already there.
 870       if (curr_reco.definitions().find(name)) {
 871         rec = *curr_reco.definitions().find(name);
 872         found_name = true;
 873       }
 874       if (found_name) {
 875         // this is bad.  this means we are not unique.  remove the manifest
 876         // file due to this error.
 877         _manifest.close();  // close the file since we want to whack it.
 878         filename(_manifest_filename).unlink();
 879         non_continuable_error(class_name(), path,
 880             a_sprintf("There is a duplicate name here (%s)!  "
 881                 "Also defined in %s.", name.s(), rec._path.s()));
 882       }
 883
 884       // record the definition in the appropriate table.
 885       curr_reco.definitions().add(name, item_record(name, value,
 886           description, path, curr_reco._tag));
 887
 888 //log(curr_pattern + a_sprintf(": name=%s value=%d desc=[%s]\n", name.s(), value, description.s()));
 889
 890     }
 891   }
 892
 893   if (modified) {
 894     // rewrite the file, since we modified its contents.
 895     bool chmod_result = filename(path).chmod(filename::ALLOW_BOTH,
 896         filename::USER_RIGHTS);
 897 /*
 898     int chmod_value;
 899 #ifdef __UNIX__
 900     chmod_value = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
 901 #elif defined(__WIN32__)
 902     chmod_value = _S_IREAD | _S_IWRITE;
 903 #else
 904     //unknown.  let's try unix...
 905     chmod_value = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
 906 #endif
 907     int chmod_result = chmod(path.s(), chmod_value);
 908 */
 909     if (!chmod_result) {
 910       log(astring("there was a problem changing permissions on ") + path
 911           + "; writing the new version might fail.", ALWAYS_PRINT);
 912     }
 913
 914     byte_filer rewriting(path, "wb");
 915     rewriting.write(contents);
 916     rewriting.close();
 917   }
 918
 919   return true;
 920 }
 921
 922 bool value_tagger::process_tree(const astring &path)
 923 {
 924   directory_tree dir(path, "*.h");
 925   if (!dir.good()) return false;
 926
 927   dir_tree_iterator *ted = dir.start(directory_tree::prefix);
 928     // create our iterator to perform a prefix traversal.
 929
 930   filename curr_dir;  // the current path the iterator is at.
 931   string_array files;  // the filenames held at the iterator.
 932
 933   while (directory_tree::current(*ted, curr_dir, files)) {
 934     // we have a good directory to process.
 935
 936     // omit any subdirectories that exactly match directories we've already
 937     // scanned.  necessary to avoid redoing whole areas.
 938     if (!_dirs_seen.find(curr_dir)) {
 939       // deal with each matching header file we've found.
 940       for (int i = 0; i < files.length(); i++) {
 941         bool file_ret = process_file(filename(curr_dir.raw(), files[i]));
 942         if (!file_ret) {
 943           log(astring("There was an error while processing ") + files[i], ALWAYS_PRINT);
 944         }
 945       }
 946
 947       _dirs_seen.add(curr_dir, "");
 948     }
 949
 950     // go to the next place.
 951     directory_tree::next(*ted);
 952   }
 953
 954   directory_tree::throw_out(ted);
 955   return true;
 956 }
 957
 958 HOOPLE_MAIN(value_tagger, )
 959
 960 #ifdef __BUILD_STATIC_APPLICATION__
 961   // static dependencies found by buildor_gen_deps.sh:
 962   #include <application/application_shell.cpp>
 963   #include <application/command_line.cpp>
 964   #include <application/windoze_helper.cpp>
 965   #include <basis/astring.cpp>
 966   #include <basis/common_outcomes.cpp>
 967   #include <basis/environment.cpp>
 968   #include <basis/guards.cpp>
 969   #include <basis/mutex.cpp>
 970   #include <basis/utf_conversion.cpp>
 971   #include <configuration/application_configuration.cpp>
 972   #include <configuration/configurator.cpp>
 973   #include <configuration/ini_configurator.cpp>
 974   #include <configuration/ini_parser.cpp>
 975   #include <configuration/table_configurator.cpp>
 976   #include <configuration/variable_tokenizer.cpp>
 977   #include <filesystem/byte_filer.cpp>
 978   #include <filesystem/directory.cpp>
 979   #include <filesystem/directory_tree.cpp>
 980   #include <filesystem/file_info.cpp>
 981   #include <filesystem/file_time.cpp>
 982   #include <filesystem/filename.cpp>
 983   #include <filesystem/filename_list.cpp>
 984   #include <filesystem/filename_tree.cpp>
 985   #include <filesystem/huge_file.cpp>
 986   #include <loggers/combo_logger.cpp>
 987   #include <loggers/console_logger.cpp>
 988   #include <loggers/critical_events.cpp>
 989   #include <loggers/file_logger.cpp>
 990   #include <loggers/program_wide_logger.cpp>
 991   #include <nodes/node.cpp>
 992   #include <nodes/packable_tree.cpp>
 993   #include <nodes/path.cpp>
 994   #include <nodes/tree.cpp>
 995   #include <structures/bit_vector.cpp>
 996   #include <structures/checksums.cpp>
 997   #include <structures/object_packers.cpp>
 998   #include <structures/static_memory_gremlin.cpp>
 999   #include <structures/string_hasher.cpp>
1000   #include <structures/string_table.cpp>
1001   #include <structures/version_record.cpp>
1002   #include <textual/byte_formatter.cpp>
1003   #include <textual/parser_bits.cpp>
1004   #include <textual/string_manipulation.cpp>
1005   #include <timely/earth_time.cpp>
1006   #include <timely/time_stamp.cpp>
1007 #endif // __BUILD_STATIC_APPLICATION__
1008