3 ###############################################################################
6 # Author : Chris Koeritz #
7 # Rights : Copyright (C) 2005-$now by Author #
11 # Turns a text file into a web page, where the URLs in the text file #
12 # appear as links in the web page. #
14 ###############################################################################
15 # This program is free software; you can redistribute it and/or modify it #
16 # under the terms of the GNU General Public License as published by the Free #
17 # Software Foundation; either version 2 of the License or (at your option) #
18 # any later version. See: "http://www.gruntose.com/Info/GNU/GPL.html" for a #
19 # version of the License. Please send any updates to "fred@gruntose.com". #
20 ###############################################################################
22 #require "filename_helper.pl";
23 #require "inc_num.pl";
25 &generate_web_page(@ARGV);
29 sub generate_web_page {
30 local($text_file, $web_page) = @_;
31 if ($text_file eq "") {
32 print "The first parameter must be a text file to use as input.\n";
35 if (! -e $text_file) {
36 print "The text file that you specified does not exist.\n";
39 if ($web_page eq "") {
40 print "The second parameter must be a web page to create.\n";
44 print "The web page you specified is already present--not overwriting.\n";
48 open(INPUT_FILE, "<$text_file")
49 || die("Could not open the text file $text_file for reading.\n");
50 open(OUTPUT_FILE, ">$web_page")
51 || die("Could not open the web page $web_page for writing.\n");
53 # dump the web heading stuff out.
55 <!DOCTYPE doctype PUBLIC \"-//w3c//dtd html 4.0 transitional//en\">
58 <meta http-equiv=\"Content-Type\"
59 content=\"text/html; charset=iso-8859-1\">
60 <meta name=\"GENERATOR\"
61 content=\"Fredzilla/14 [en] (linux; U) [Muttscape]\">
62 <meta name=\"Author\" content=\"Fred T. Hamster\">
63 <title>Links scavenged from $text_file</title>
65 <body link=\"#ffff99\" vlink=\"#ffcc33\" alink=\"#ffcc66\"
66 style=\"background-color: rgb(0, 102, 0); color: rgb(204, 255, 255);\">
71 while (<INPUT_FILE>) {
73 chomp $current; # take CR off of end.
75 # take spaces off the end of the line.
76 while (substr($current, -1, 1) eq " ") { chop $current; }
77 # take spaces off the front of the line.
78 while (substr($current, 0, 1) eq " ") { $current = substr($current, 1); }
80 # this block repairs partial URLs, if there is not protocol present.
81 if ($current =~ /[^h][^t][^t][^p][^:][^\/\\][^\/\\]www\./) {
82 # repair a missing http:// in front.
83 $just_text = $current;
84 $just_text =~ s/(.*)www\.[^ ]*(.*)/\1 \2/;
85 #print "just text is $just_text\n";
86 $current =~ s/.*(www\.[^ ]*).*/http:\/\/\1/;
87 #print "curr is $current\n";
88 print OUTPUT_FILE "$just_text\n<br>\n";
89 } elsif ($current =~ /[^f][^t][^p][^:][^\/\\][^\/\\]ftp\./) {
90 # repair a missing ftp:// in front.
91 $just_text = $current;
92 $just_text =~ s/(.*)ftp\.[^ ]*(.*)/\1 \2/;
93 #print "just text is $just_text\n";
94 $current =~ s/.*(ftp\.[^ ]*).*/ftp:\/\/\1/;
95 #print "curr is $current\n";
96 print OUTPUT_FILE "$just_text\n<br>\n";
97 ### print OUTPUT_FILE "<a href=\"ftp://$current\">$current</a><br>\n";
100 # look for matches to our supported URL types.
101 if ($current =~ /http:/) {
102 # treat a web URL simply by plugging it into a link definition.
103 $just_url = $current;
104 $just_url =~ s/.*(http:[^ ]*).*/\1/;
105 #print "just url is $just_url\n";
106 $just_text = $current;
107 $just_text =~ s/(.*)http:[^ ]*(.*)/\1 \2/;
108 #print "just text is $just_text\n";
109 print OUTPUT_FILE "$just_text\n";
110 print OUTPUT_FILE "<br><a href=\"$just_url\">$just_url</a><br>\n";
111 } elsif ($current =~ /https:/) {
112 # treat a secure web URL simply by plugging it into a link definition.
113 $just_url = $current;
114 $just_url =~ s/.*(https:[^ ]*).*/\1/;
115 #print "just url is $just_url\n";
116 $just_text = $current;
117 $just_text =~ s/(.*)https:[^ ]*(.*)/\1 \2/;
118 #print "just text is $just_text\n";
119 print OUTPUT_FILE "$just_text\n";
120 print OUTPUT_FILE "<br><a href=\"$just_url\">$just_url</a><br>\n";
121 } elsif ($current =~ /ftp:/) {
122 # treat an ftp URL simply by plugging it into a link definition.
123 $just_url = $current;
124 $just_url =~ s/.*(ftp:[^ ]*).*/\1/;
125 #print "just url is $just_url\n";
126 $just_text = $current;
127 $just_text =~ s/(.*)ftp:[^ ]*(.*)/\1 \2/;
128 #print "just text is $just_text\n";
129 print OUTPUT_FILE "$just_text\n";
130 print OUTPUT_FILE "<br><a href=\"$just_url\">$just_url</a><br>\n";
131 # print OUTPUT_FILE "<a href=\"$current\">$current</a><br>\n";
133 # just print a regular line of text.
134 print OUTPUT_FILE "$current<br>\n";
138 print OUTPUT_FILE "</body>\n</html>\n";