1 /*****************************************************************************\
3 * Name : bookmark_tree *
4 * Author : Chris Koeritz *
6 *******************************************************************************
7 * Copyright (c) 2005-$now By Author. This program is free software; you can *
8 * redistribute it and/or modify it under the terms of the GNU General Public *
9 * License as published by the Free Software Foundation; either version 2 of *
10 * the License or (at your option) any later version. This is online at: *
11 * http://www.fsf.org/copyleft/gpl.html *
12 * Please send any updates to: fred@gruntose.com *
13 \*****************************************************************************/
15 #include "bookmark_tree.h"
17 #include <basis/astring.h>
18 #include <basis/functions.h>
19 #include <basis/guards.h>
20 #include <filesystem/byte_filer.h>
21 #include <filesystem/filename.h>
22 #include <loggers/critical_events.h>
23 #include <loggers/file_logger.h>
24 #include <loggers/program_wide_logger.h>
25 #include <nodes/symbol_tree.h>
26 #include <structures/amorph.h>
27 #include <structures/string_table.cpp>
28 #include <structures/symbol_table.h>
29 #include <textual/list_parsing.h>
30 #include <textual/parser_bits.h>
32 ///#include <stdio.h>//temp
34 using namespace basis;
35 using namespace filesystem;
36 using namespace loggers;
37 using namespace nodes;
38 using namespace structures;
39 using namespace textual;
42 // uncomment to have more debugging noise, but a reasonable amount.
43 //#define DEBUG_MARKS_TREE
44 // uncomment to get crazy noisy debug noise about tree traversal.
47 #define BASE_LOG(s) program_wide_logger::get().log(s)
48 #define SHOW_LINE a_sprintf("line %d: ", _line_number)
50 #define LOG(s) CLASS_EMERGENCY_LOG(program_wide_logger::get(), SHOW_LINE + s)
51 #define DEADLY_LINE (astring(func) + a_sprintf(", line %d: ", _line_number))
53 const int ESTIMATED_ELEMENTS = 100;
54 // we're planning for about this many links to be efficiently handled.
56 const int MAX_LINE_SIZE = 256 * KILOBYTE;
57 // the largest line we'll process in the links database.
59 // used to compare two strings while ignoring case; we use this to find
60 // our categories in the symbol table.
61 bool case_insense_compare(const astring &a, const astring &b)
62 { return a.iequals(b); }
64 ////////////////////////////////////////////////////////////////////////////
66 listo_links::listo_links() : amorph<link_record>(), _next_index(0) {}
68 void listo_links::add(link_record *new_rec, bool sort)
70 // we don't sort blank lines--they just get dropped in the end of
72 if (sort && new_rec->_description.t()) {
73 for (int i = _next_index; i < elements(); i++) {
74 const astring &desc_cur = borrow(i)->_description;
75 //this check doesn't make much sense; it only checks if the description is equal?
76 // if it were really sorting, wouldn't it need to check if the check is greater than current?
77 if (desc_cur.iequals(new_rec->_description)
78 // || shouldn't there be a case for this being greater than the current???
88 _next_index = elements();
91 ////////////////////////////////////////////////////////////////////////////
93 class symbol_int : public symbol_table<int>
96 symbol_int() : symbol_table<int>(10) {}
99 ////////////////////////////////////////////////////////////////////////////
101 bookmark_tree::bookmark_tree()
103 _mark_tree(new inner_mark_tree("Root", 0, ESTIMATED_ELEMENTS)),
106 _last_parent(_mark_tree),
107 _last_node(_mark_tree),
108 _links_seen(new symbol_int),
109 _category_names(new string_table)
112 bookmark_tree::~bookmark_tree()
116 WHACK(_category_names);
119 void bookmark_tree::break_name(const astring &to_break, astring &name,
122 nick = astring::empty_string();
124 int indy = name.find('[');
125 if (negative(indy)) return;
126 nick = name.substring(indy + 1, name.end());
127 while ( (nick[nick.end()] == ' ') || (nick[nick.end()] == ']') )
128 nick.zap(nick.end(), nick.end());
129 name.zap(indy, name.end());
134 inner_mark_tree &bookmark_tree::access_root() { return *_mark_tree; }
136 bool bookmark_tree::magic_category_comparison(const astring &a, const astring &b)
138 FUNCDEF("magic_category_comparison");
139 //LOG(astring("compare: a=") + a + " b=" + b);
140 if (a.iequals(b)) return true;
141 astring a_name, a_nick;
142 break_name(a, a_name, a_nick);
143 astring b_name, b_nick;
144 break_name(b, b_name, b_nick);
145 if (a_name.iequals(b_name)) return true;
146 if (a_nick.t() && a_nick.iequals(b_name)) return true;
147 if (b_nick.t() && a_name.iequals(b_nick)) return true;
148 if (a_nick.t() && b_nick.t() && a_nick.iequals(b_nick)) return true;
152 const astring &HTTP_HEAD = "http://";
153 const astring &FTP_HEAD = "ftp://";
154 const astring &WWW_SITE = "www.";
155 const astring &FTP_SITE = "ftp.";
157 bool bookmark_tree::advance(int &index, const astring &check, const astring &finding)
159 if (check.compare(finding, index, 0, finding.length(), false)) {
160 index += finding.length();
166 int bookmark_tree::find_prune_point(const astring &to_prune)
169 advance(to_return, to_prune, HTTP_HEAD);
170 advance(to_return, to_prune, FTP_HEAD);
171 advance(to_return, to_prune, WWW_SITE);
172 advance(to_return, to_prune, FTP_SITE);
176 astring bookmark_tree::prune_link_down(const astring &to_prune)
178 //printf("%s\n", (astring("pruned=") + to_prune.substring(find_prune_point(to_prune), to_prune.end())).s());
179 return to_prune.substring(find_prune_point(to_prune), to_prune.end()); }
181 bool bookmark_tree::excellent_link_comparator(const astring &a, const astring &b)
183 int prune_a = find_prune_point(a);
184 int prune_b = find_prune_point(b);
185 int bigger_len = maximum(a.length() - prune_a, b.length() - prune_b);
186 bool to_return = a.compare(b, prune_a, prune_b, bigger_len, false);
187 //if (to_return) printf("%s and %s are equal.", a.s(), b.s());
191 inner_mark_tree *bookmark_tree::find_parent(const astring &parent_name)
193 FUNCDEF("find_parent");
194 // first, look for the node above the last parent.
195 inner_mark_tree *parent = dynamic_cast<inner_mark_tree *>
196 (_last_parent->find(parent_name, inner_mark_tree::recurse_upward,
197 magic_category_comparison));
199 #ifdef DEBUG_MARKS_TREE
201 LOG(astring("trying upwards find for parent node ") + parent_name);
205 #ifdef DEBUG_MARKS_TREE
206 LOG(astring("upwards find failed seeking on last_parent node ")
210 // we didn't find the parent above the last category...
211 parent = dynamic_cast<inner_mark_tree *>(_last_node->find(parent_name,
212 inner_mark_tree::recurse_upward, magic_category_comparison));
216 #ifdef DEBUG_MARKS_TREE
217 LOG(astring("upwards find failed seeking on last_node ") + parent_name);
220 // last node didn't help either.
221 parent = dynamic_cast<inner_mark_tree *>(_mark_tree->find(parent_name,
222 inner_mark_tree::recurse_downward, magic_category_comparison));
225 // failed to find the parent node, so hook it to the root node.
226 LOG(astring("failed to find parent node ") + parent_name);
228 // create a parent node and use it for this guy.
229 inner_mark_tree *new_node = new inner_mark_tree(parent_name,
230 _line_number, ESTIMATED_ELEMENTS);
231 _mark_tree->attach(new_node);
237 #ifdef DEBUG_MARKS_TREE
238 LOG(astring("found parent node ") + parent_name);
245 inner_mark_tree *bookmark_tree::process_category(const string_array &items)
247 FUNCDEF("process_category");
248 const astring &category_name = items[1];
249 const astring &parent_name = items[2];
251 if (items.length() > 3) {
252 // complain about a possibly malformed category.
253 LOG(astring("category ") + category_name + " under " + parent_name
254 + " has extra fields!");
257 //BASE_LOG("CURRENT:");
258 //BASE_LOG(_mark_tree->text_form());
260 // make sure we don't add anything to the tree if this is the root.
261 if (!parent_name || magic_category_comparison("Root", category_name)) {
262 #ifdef DEBUG_MARKS_TREE
263 LOG(astring("skipping parent node for ") + category_name);
268 // ensure that the categories aren't competing with other names.
269 astring main_name, nickname;
270 break_name(category_name, main_name, nickname);
271 astring *found1 = _category_names->find(main_name, case_insense_compare);
272 astring *found2 = _category_names->find(nickname, case_insense_compare);
273 if (found1 || found2) {
275 if (found1) catnames = *found1; // add the first match, if it exists.
277 if (!!catnames) catnames += " and ";
280 LOG(astring("category name \"") + category_name
281 + "\" in conflict with existing: " + catnames);
282 inner_mark_tree *fake_it = NULL_POINTER;
284 //hmmm: neither of these are right; they need to use a comparator that
285 // uses our magic comparison function.
289 LOG(astring("found existing category for main name: ") + main_name);
291 // fake_it = (inner_mark_tree *)_mark_tree->find(*found1,
292 // symbol_tree::recurse_downward);
293 fake_it = dynamic_cast<inner_mark_tree *>(_mark_tree->find
294 (*found1, inner_mark_tree::recurse_downward,
295 magic_category_comparison));
299 LOG(astring("returning existing category for main name: ") + main_name
300 + " as: " + fake_it->name());
306 LOG(astring("found existing category for nickname: ") + nickname);
308 /// fake_it = (inner_mark_tree *)_mark_tree->find(*found2,
309 /// symbol_tree::recurse_downward);
310 fake_it = dynamic_cast<inner_mark_tree *>(_mark_tree->find
311 (*found2, inner_mark_tree::recurse_downward,
312 magic_category_comparison));
316 LOG(astring("returning existing category for nickname: ") + nickname
317 + " as: " + fake_it->name());
321 LOG("==> failure to find a match for either category!");
322 deadly_error(class_name(), func, "collision resolution code failed; "
323 "please fix category error");
326 // now that we know these names are unique, we'll add them into the list
327 // so future categories can't reuse these.
328 _category_names->add(main_name, main_name);
329 if (!!nickname) _category_names->add(nickname, nickname);
331 inner_mark_tree *parent = find_parent(parent_name);
332 _last_parent = parent; // set the parent for the next time.
334 // see if the category is already present under the parent.
335 for (int i = 0; i < parent->branches(); i++) {
336 inner_mark_tree *curr = dynamic_cast<inner_mark_tree *>(parent->branch(i));
338 non_continuable_error(class_name(), DEADLY_LINE, "missing branch in tree");
339 #ifdef DEBUG_MARKS_TREE
340 LOG(astring("looking at branch ") + curr->name());
342 if (magic_category_comparison(curr->name(), category_name)) {
343 // it already exists? argh.
344 LOG(astring("category ") + category_name + " already exists under "
345 + parent_name + ".");
351 inner_mark_tree *new_node = new inner_mark_tree(category_name,
352 _line_number, ESTIMATED_ELEMENTS);
353 parent->attach(new_node);
355 _last_node = new_node;
359 #ifdef DEBUG_MARKS_TREE
360 LOG(astring("attaching node ") + category_name + " to parent "
366 void bookmark_tree::process_link(const string_array &items)
368 FUNCDEF("process_link");
369 astring description = items[1];
370 astring parent_name = items[2];
371 astring url = "UNKNOWN";
372 if (items.length() >= 4) url = items[3];
374 // strip any directory slashes that are provided as a suffix. we don't need
375 // them and they tend to confuse the issue when we look for duplicates.
376 while (url[url.end()] == '/') {
377 url.zap(url.end(), url.end());
380 // make some noise if they seem to have a badly formed link.
381 if (items.length() < 4) {
382 LOG(astring("link ") + description + " under " + parent_name
384 } else if (items.length() > 4) {
385 LOG(astring("link ") + description + " under " + parent_name
386 + " has extra fields!");
389 // find the parent for this link.
390 inner_mark_tree *parent = find_parent(parent_name);
391 _last_parent = parent; // set the parent for the next time.
393 // see if the link already exists.
394 int *found = _links_seen->find(url, excellent_link_comparator);
396 // this is not so great; a duplicate link has been found.
397 LOG(a_sprintf("Duplicate Link: line %d already has ", *found) + url);
400 _links_seen->add(prune_link_down(url), _line_number);
403 // add the link to the parent.
404 link_record *new_rec = new link_record(description, url, _line_number);
405 parent->_links.add(new_rec);
410 void bookmark_tree::process_comment(const astring ¤t_line_in)
412 FUNCDEF("process_comment");
413 astring current_line = current_line_in;
415 // output the comment as simple text.
416 //BASE_LOG("comment case");
417 if (current_line.contains("http:")) {
418 astring hold_curr = current_line;
419 int indy = current_line.find("http:");
420 hold_curr.zap(0, indy - 1);
421 current_line = astring(" "
422 "<a href=\"") + hold_curr + "\">" + hold_curr + "</a>";
423 } else if (current_line.t()) {
424 // snap the comment character off of the front.
425 current_line.zap(0, 0);
428 link_record *new_rec = new link_record(current_line,
429 astring::empty_string(), _line_number);
430 _last_node->_links.add(new_rec, false);
433 int bookmark_tree::read_csv_file(const astring &input_filename)
435 FUNCDEF("read_csv_file");
436 byte_filer input_file(input_filename, "r");
437 if (!input_file.good())
438 non_continuable_error(class_name(), DEADLY_LINE,
439 "the input file could not be opened");
441 string_array items; // parsed in csv line.
442 astring current_line; // read from input file.
444 // read the lines in the file, one at a time.
445 while (input_file.getline(current_line, MAX_LINE_SIZE) > 0) {
446 _line_number++; // go to the next line.
447 // remove the carriage returns first.
448 while (parser_bits::is_eol(current_line[current_line.end()])) {
449 current_line.zap(current_line.end(), current_line.end());
451 current_line.strip_spaces();
452 if (!current_line.length()) {
453 // // blank lines get treated as a special case. they are always added
454 // // at the end of the list.
455 // process_comment(current_line);
457 } else if (current_line[0] == '#') {
458 // handle a comment in the database.
459 process_comment(current_line);
461 // csv parse the line, since we don't support much else.
462 bool parsed = list_parsing::parse_csv_line(current_line, items);
464 non_continuable_error(class_name(), DEADLY_LINE,
465 astring("the line could not be parsed: ") + current_line);
466 if (!items.length()) {
467 LOG("bad formatting on this line.");
470 if (items[0].iequals("C")) {
471 inner_mark_tree *node = process_category(items);
473 LOG(astring("failed to get a node for ") + items[1]);
475 } else if (items[0].iequals("L")) {
478 non_continuable_error(class_name(), DEADLY_LINE,
479 astring("unknown format in line: ") + current_line);