1 #ifndef BOOKMARK_TREE_CLASS
2 #define BOOKMARK_TREE_CLASS
4 /*****************************************************************************\
6 * Name : bookmark_tree *
7 * Author : Chris Koeritz *
11 * Parses a link database in HOOPLE format into tree structure. *
13 *******************************************************************************
14 * Copyright (c) 2005-$now By Author. This program is free software; you can *
15 * redistribute it and/or modify it under the terms of the GNU General Public *
16 * License as published by the Free Software Foundation; either version 2 of *
17 * the License or (at your option) any later version. This is online at: *
18 * http://www.fsf.org/copyleft/gpl.html *
19 * Please send any updates to: fred@gruntose.com *
20 \*****************************************************************************/
22 #include <basis/astring.h>
23 #include <nodes/symbol_tree.h>
24 #include <structures/amorph.h>
25 #include <structures/string_array.h>
26 #include <structures/string_table.h>
29 class inner_mark_tree;
34 ////////////////////////////////////////////////////////////////////////////
40 virtual ~bookmark_tree();
41 DEFINE_CLASS_NAME("bookmark_tree");
43 int read_csv_file(const basis::astring &input_filename);
44 // reads the file contents of "input_filename" into this tree.
46 static void break_name(const basis::astring &to_break, basis::astring &name,
47 basis::astring &nick);
48 // breaks a category name into the two components, if they exist.
50 static bool magic_category_comparison(const basis::astring &a, const basis::astring &b);
51 // compares the two strings "a" and "b" and returns true if either the
52 // main name or the nickname matches either.
54 static basis::astring prune_link_down(const basis::astring &to_prune);
55 // reduces a URL to its bare bones. it will strip out the "http://" and "www." and such.
57 static bool excellent_link_comparator(const basis::astring &a, const basis::astring &b);
58 // a string comparator that handles how links are often formed. it uses the link pruner
59 // to decide whether the links are equal at their root.
61 inner_mark_tree *process_category(const structures::string_array &items);
62 // handles category declarations and adds the new category to our list.
63 // this tries to do the intelligent thing if the category is already
64 // found to exist, meaning that the file has a duplicate category
67 void process_link(const structures::string_array &items);
69 void process_comment(const basis::astring ¤t_line_in);
71 inner_mark_tree *find_parent(const basis::astring &parent_name);
72 // locates the parent called "parent_name" given the context that
73 // we've saved about the last parent.
75 static bool advance(int &index, const basis::astring &check, const basis::astring &finding);
76 //!< moves the "index" forward if the "finding" string is the head of "check".
78 static int find_prune_point(const basis::astring &to_prune);
79 //!< attempts to locate the real start of the root URL in "to_prune".
81 // these provide access to the information held about the tree...
83 inner_mark_tree &access_root(); // allows access to the root of the tree.
85 int link_count() const { return _link_count; }
87 int category_count() const { return _category_count; }
89 // public data members... currently this is used outside the class.
90 int _line_number; // the current line in the database.
93 inner_mark_tree *_mark_tree; // our tree of categories.
94 int _link_count; // number of links.
95 int _category_count; // number of categories.
96 inner_mark_tree *_last_parent; // the last parent we saw.
97 inner_mark_tree *_last_node; // the last node we touched.
98 symbol_int *_links_seen; // URLs we've seen.
99 structures::string_table *_category_names; // used to enforce uniqueness of categories.
102 ////////////////////////////////////////////////////////////////////////////
107 basis::astring _description;
111 link_record(const basis::astring &description, const basis::astring &url, int uid)
112 : _description(description), _url(url), _uid(uid) {}
115 ////////////////////////////////////////////////////////////////////////////
117 class listo_links : public structures::amorph<link_record>
122 void add(link_record *new_rec, bool sort = true);
125 int _next_index; // tracks where we've added unsorted items.
128 ////////////////////////////////////////////////////////////////////////////
130 class inner_mark_tree : public nodes::symbol_tree
133 listo_links _links; // the list held at this node.
134 int _uid; // the unique identifier of this node.
136 inner_mark_tree(const basis::astring &node_name, int uid, int max_bits = 2)
137 : nodes::symbol_tree(node_name, max_bits), _uid(uid) {}
141 ////////////////////////////////////////////////////////////////////////////