feisty meow concerns codebase  2.140
marks_sorter.cpp
Go to the documentation of this file.
1 /*****************************************************************************\
2 * *
3 * Name : marks_sorter *
4 * Author : Chris Koeritz *
5 * *
6 * Purpose: *
7 * *
8 * Processes a link database in HOOPLE format and generates a new database *
9 * that is sorted and always uses category nicknames where defined. *
10 * *
11 *******************************************************************************
12 * Copyright (c) 2006-$now By Author. This program is free software; you can *
13 * redistribute it and/or modify it under the terms of the GNU General Public *
14 * License as published by the Free Software Foundation; either version 2 of *
15 * the License or (at your option) any later version. This is online at: *
16 * http://www.fsf.org/copyleft/gpl.html *
17 * Please send any updates to: fred@gruntose.com *
18 \*****************************************************************************/
19 
20 #include "bookmark_tree.h"
21 
23 #include <basis/functions.h>
24 #include <basis/guards.h>
25 #include <basis/astring.h>
26 #include <filesystem/byte_filer.h>
27 #include <filesystem/filename.h>
28 #include <loggers/combo_logger.h>
31 #include <textual/list_parsing.h>
32 #include <textual/parser_bits.h>
33 
34 using namespace application;
35 using namespace basis;
36 using namespace filesystem;
37 using namespace loggers;
38 using namespace nodes;
39 using namespace structures;
40 using namespace textual;
41 
42 //#define DEBUG_MARKS
43  // uncomment to have more debugging noise.
44 
45 #undef BASE_LOG
46 #define BASE_LOG(s) program_wide_logger::get().log(s, ALWAYS_PRINT)
47 #undef LOG
48 #define LOG(s) CLASS_EMERGENCY_LOG(program_wide_logger::get(), \
49  a_sprintf("line %d: ", _categories._line_number) + s)
50 
51 const int MAX_FILE_SIZE = 4 * MEGABYTE;
52  // the largest file we'll read.
53 
55 
56 class marks_sorter : public application_shell
57 {
58 public:
59  marks_sorter()
60  : application_shell(), _loader_count(0), _link_spool(0) {}
61  DEFINE_CLASS_NAME("marks_sorter");
62  virtual int execute();
63  int print_instructions(const filename &program_name);
64 
65  int write_new_marks(const astring &output_filename);
66  // given a tree of links, this writes out a new sorted file to the
67  // "output_filename".
68 
69 private:
70  bookmark_tree _categories; // our tree of categories.
71  int _loader_count; // count of the loader functions.
72  int _link_spool; // count of which link we're writing.
73 };
74 
76 
77 int marks_sorter::print_instructions(const filename &program_name)
78 {
79  a_sprintf to_show("%s:\n\
80 This program needs two filenames as command-line parameters. The -i flag\n\
81 is used to specify the input filename, which is expected to be in the HOOPLE\n\
82 link database format. The -o flag specifies the new bookmarks file to be\n\
83 created, which will also be in the HOOPLE link format.\n\
84 The HOOPLE link format is documented here:\n\
85  http://feistymeow.org/guides/link_database/format_manifesto.txt\n\
86 ", program_name.basename().raw().s(), program_name.basename().raw().s());
87  program_wide_logger::get().log(to_show, ALWAYS_PRINT);
88  return 12;
89 }
90 
91 int marks_sorter::execute()
92 {
93  FUNCDEF("execute");
95 
96  command_line cmds(_global_argc, _global_argv); // process the command line parameters.
97  astring input_filename; // we'll store our link database name here.
98  astring output_filename; // where the web page we're creating goes.
99  if (!cmds.get_value('i', input_filename, false))
100  return print_instructions(cmds.program_name());
101  if (!cmds.get_value('o', output_filename, false))
102  return print_instructions(cmds.program_name());
103 
104  BASE_LOG(astring("input file: ") + input_filename);
105  BASE_LOG(astring("output file: ") + output_filename);
106 
107  filename outname(output_filename);
108  if (outname.exists()) {
109  non_continuable_error(class_name(), func, astring("the output file ")
110  + output_filename + " already exists. It would be over-written if "
111  "we continued.");
112  }
113 
114  int ret = _categories.read_csv_file(input_filename);
115  if (ret) return ret;
116 
117  ret = write_new_marks(output_filename);
118  if (ret) return ret;
119 
120  return 0;
121 }
122 
123 int marks_sorter::write_new_marks(const astring &output_filename)
124 {
125  FUNCDEF("write_new_marks");
126  // open the output file for streaming out the new marks file.
127  filename outname(output_filename);
128  byte_filer output_file(output_filename, "w");
129  if (!output_file.good())
130  non_continuable_error(class_name(), func, "the output file could not be opened");
131 
132  bool just_had_return = false; // did we just see a carriage return?
133  bool first_line = true; // is this the first line to be emitted?
134 
135  // traverse the tree in prefix order.
136  tree::iterator itty = _categories.access_root().start(tree::prefix);
137  tree *curr = NULL_POINTER; // the current node.
138 
139  while ( (curr = itty.next()) ) {
140  inner_mark_tree *nod = (inner_mark_tree *)curr;
141  // set up a category printout for this node.
142  string_array cat_list;
143  cat_list += "C";
144  cat_list += nod->name();
145  inner_mark_tree *pare = (inner_mark_tree *)nod->parent();
146  if (pare) {
147  astring name_split, nick_split;
148  _categories.break_name(pare->name(), name_split, nick_split);
149  if (!nick_split) cat_list += name_split;
150  else cat_list += nick_split;
151  } else {
152  cat_list += "";
153  }
154 
155  // create a text line to send to the output file.
156  astring tmp;
157  list_parsing::create_csv_line(cat_list, tmp);
158  tmp += "\n";
159  if (!just_had_return && !first_line) {
160  // generate a blank line before the category name.
161  output_file.write(parser_bits::platform_eol_to_chars());
162  }
163 
164  // reset the flags after we've checked them.
165  just_had_return = false;
166  first_line = false;
167 
168  output_file.write(tmp);
169  // write the actual category definition.
170 
171  // print the links for all of the ones stored at this node.
172  for (int i = 0; i < nod->_links.elements(); i++) {
173  link_record *lin = nod->_links.borrow(i);
174  if (!lin->_url) {
175  // just a comment.
176  astring descrip = lin->_description;
177  if (descrip.contains("http:")) {
178  // we'll clean the html formatting out that we added earlier.
179  int indy = descrip.find('"');
180  if (non_negative(indy)) {
181  descrip.zap(0, indy);
182  indy = descrip.find('"');
183  if (non_negative(indy)) descrip.zap(indy, descrip.end());
184  }
185  descrip = astring(" ") + descrip;
186  // add a little spacing.
187  }
188  if (descrip.t()) {
189  output_file.write(astring("#") + descrip + "\n");
190  just_had_return = false;
191  } else {
192  // this line's totally blank, so we'll generate a blank line.
193  // we don't want to put in more than one blank though, so we check
194  // whether we did this recently.
195  if (!just_had_return) {
196  output_file.write(parser_bits::platform_eol_to_chars());
197  just_had_return = true; // set our flag for a carriage return.
198  }
199  }
200  } else {
201  // should be a real link.
202  string_array lnks;
203  lnks += "L";
204  lnks += lin->_description;
205  // use just the nickname for the parent, if there is a nick.
206  astring name_split;
207  astring nick_split;
208  _categories.break_name(nod->name(), name_split, nick_split);
209  if (!nick_split) lnks += nod->name();
210  else lnks += nick_split;
211  lnks += lin->_url;
212  list_parsing::create_csv_line(lnks, tmp);
213  tmp += "\n";
214  output_file.write(tmp);
215  just_had_return = false;
216  }
217  }
218  }
219 
220  output_file.close();
221 
222  BASE_LOG(a_sprintf("wrote %d links in %d categories.",
223  _categories.link_count(), _categories.category_count()));
224  BASE_LOG(astring());
225 
226  return 0;
227 }
228 
230 
231 HOOPLE_MAIN(marks_sorter, )
232 
int print_instructions(bool good, const astring &program_name)
Definition: checker.cpp:45
The application_shell is a base object for console programs.
a_sprintf is a specialization of astring that provides printf style support.
Definition: astring.h:440
Provides a dynamically resizable ASCII character string.
Definition: astring.h:35
const char * s() const
synonym for observe. the 's' stands for "string", if that helps.
Definition: astring.h:113
bool t() const
t() is a shortcut for the string being "true", as in non-empty.
Definition: astring.h:97
virtual void zap(int start, int end)
Deletes the characters between "start" and "end" inclusively.
Definition: astring.cpp:521
int end() const
returns the index of the last (non-null) character in the string.
Definition: astring.h:86
int find(char to_find, int position=0, bool reverse=false) const
Locates "to_find" in "this".
Definition: astring.cpp:574
bool contains(const astring &to_find) const
Returns true if "to_find" is contained in this string or false if not.
Definition: astring.cpp:162
Provides file managment services using the standard I/O support.
Definition: byte_filer.h:32
Provides operations commonly needed on file names.
Definition: filename.h:64
const basis::astring & raw() const
returns the astring that we're holding onto for the path.
Definition: filename.cpp:97
filename basename() const
returns the base of the filename; no directory.
Definition: filename.cpp:385
listo_links _links
const basis::astring & name() const
returns the name of this node.
Definition: symbol_tree.cpp:78
tree * next()
Returns a pointer to the next tree in the direction of traversal.
Definition: tree.cpp:257
A dynamically linked tree with an arbitrary number of branches.
Definition: tree.h:40
virtual tree * parent() const
Returns the tree node that is the immediate ancestor of this one.
Definition: tree.cpp:429
int elements() const
the maximum number of elements currently allowed in this amorph.
Definition: amorph.h:66
contents * borrow(int field)
Returns a pointer to the information at the index "field".
Definition: amorph.h:448
An array of strings with some additional helpful methods.
Definition: string_array.h:32
#define SETUP_COMBO_LOGGER
a macro that retasks the program-wide logger as a combo_logger.
Definition: combo_logger.h:49
#define non_continuable_error(c, f, i)
an extra piece of information used, if available, in bounds_halt below.
#define NULL_POINTER
The value representing a pointer to nothing.
Definition: definitions.h:32
#define DEFINE_CLASS_NAME(objname)
Defines the name of a class by providing a couple standard methods.
Definition: enhance_cpp.h:45
#define FUNCDEF(func_in)
FUNCDEF sets the name of a function (and plugs it into the callstack).
Definition: enhance_cpp.h:57
Provides macros that implement the 'main' program of an application.
#define HOOPLE_MAIN(obj_name, obj_args)
options that should work for most unix and linux apps.
Definition: hoople_main.h:61
#define BASE_LOG(s)
const int MAX_FILE_SIZE
Implements an application lock to ensure only one is running at once.
char ** _global_argv
The guards collection helps in testing preconditions and reporting errors.
Definition: array.h:30
const int MEGABYTE
Number of bytes in a megabyte.
Definition: definitions.h:135
bool non_negative(const type &a)
non_negative returns true if "a" is greater than or equal to zero.
Definition: functions.h:45
A platform independent way to obtain the timestamp of a file.
Definition: byte_filer.cpp:37
A logger that sends to the console screen using the standard output device.
A dynamic container class that holds any kind of object via pointers.
Definition: amorph.h:55