X-Git-Url: https://feistymeow.org/gitweb/?a=blobdiff_plain;f=core%2Fapplications%2Fbookmark_tools%2Flink_parser.cpp;h=84467b5eafcdb257b99d4a44a645a07c4c3d0e75;hb=12271e032cc820565839dc4029666edac84de8ba;hp=1dd0f9edb468215b4b4fce0fcc1bf7ebc3637c21;hpb=2952ccf47b80174880141a7ecfa122089f349b8d;p=feisty_meow.git diff --git a/core/applications/bookmark_tools/link_parser.cpp b/core/applications/bookmark_tools/link_parser.cpp index 1dd0f9ed..84467b5e 100644 --- a/core/applications/bookmark_tools/link_parser.cpp +++ b/core/applications/bookmark_tools/link_parser.cpp @@ -66,8 +66,13 @@ const int MAX_FILE_SIZE = 4 * MEGABYTE; #define INCREM_N_GO { curr_index++; continue; } // puts the current character on the intermediate string. -#define ADD_INTERMEDIATE \ - intermediate_text += full_contents[curr_index] +#define ADD_INTERMEDIATE { \ + char add_in = full_contents[curr_index]; \ + if ( (add_in == '<') || (add_in == '>') ) { \ + add_in = '-'; \ + } \ + intermediate_text += add_in; \ +} // returns a character in lower-case, if 'a' is in upper case. char normalize_char(char a) @@ -138,14 +143,6 @@ void strain_out_html_codes(astring &to_edit) /* clean naughty characters out of the names. */ \ CLEAN_UP_NAUGHTY(url_string); \ CLEAN_UP_NAUGHTY(name_string); \ - if (url_string.ends(name_string)) { \ - /* handle the name being boring. replace with the intermediate text. */ \ - MAKE_MORE_ENGLISH(intermediate_text); \ - strain_out_html_codes(intermediate_text); \ - CLEAN_UP_NAUGHTY(intermediate_text); \ - if (intermediate_text.length()) \ - name_string = intermediate_text; \ - } \ /* output a link in the HOOPLE format. */ \ astring to_write = "\"L\",\""; \ to_write += translate_web_chars(name_string); \ @@ -157,6 +154,16 @@ void strain_out_html_codes(astring &to_edit) output_file.write(to_write); \ _link_count++; \ } +//was after second clean up naughty +/*argh yuck... if (url_string.ends(name_string)) { \ + / * handle the name being boring. replace with the intermediate text. * / \ + MAKE_MORE_ENGLISH(intermediate_text); \ + strain_out_html_codes(intermediate_text); \ + CLEAN_UP_NAUGHTY(intermediate_text); \ + if (intermediate_text.length()) \ + name_string = intermediate_text; \ + } \ +*/ // writes out the current section in the HOOPLE format. // currently the parent category is set to Root. @@ -164,7 +171,7 @@ void strain_out_html_codes(astring &to_edit) CLEAN_UP_NAUGHTY(last_heading); /* clean the name. */ \ /* output a category definition. */ \ astring to_write = "\"C\",\""; \ - to_write += last_heading; \ + to_write += translate_web_chars(last_heading); \ to_write += "\",\""; \ to_write += abbreviate_category(last_parents.top()); \ to_write += "\"\n"; \ @@ -253,7 +260,22 @@ astring link_parser::translate_web_chars(const astring &vervoom) { astring to_return = vervoom; to_return.replace_all("&", "&"); + to_return.replace_all("ä", "ä"); + to_return.replace_all("©", "(c)"); + to_return.replace_all("é", "é"); + to_return.replace_all("«", "--"); + to_return.replace_all("‘", "'"); + to_return.replace_all("“", "'"); + to_return.replace_all("—", "--"); + to_return.replace_all("–", "--"); + to_return.replace_all(" ", " "); + to_return.replace_all("»", "--"); + to_return.replace_all("”", "'"); + to_return.replace_all("’", "'"); + to_return.replace_all("%7E", "~"); + to_return.replace_all("%28", "("); + to_return.replace_all("%29", ")"); return to_return; } @@ -371,8 +393,8 @@ int link_parser::execute() #ifdef DEBUG_LINK_PARSER LOG("into the not an '', true); +// intermediate_text += '<'; + JUMP_TO_CHAR('>', false); continue; } #ifdef DEBUG_LINK_PARSER @@ -381,8 +403,8 @@ int link_parser::execute() // found an a, but make sure that's the only character in the word. curr_index++; if (!parser_bits::white_space(full_contents[curr_index])) { - intermediate_text += "', true); +// intermediate_text += "', false); continue; } // this looks like an address so find the start of the href.