3 ###############################################################################
6 # Author : Chris Koeritz #
7 # Rights : Copyright (C) 2005-$now by Author #
11 # Turns a text file into a web page, where the URLs in the text file #
12 # appear as links in the web page. #
14 ###############################################################################
15 # This program is free software; you can redistribute it and/or modify it #
16 # under the terms of the GNU General Public License as published by the Free #
17 # Software Foundation; either version 2 of the License or (at your option) #
18 # any later version. See: "http://www.gruntose.com/Info/GNU/GPL.html" for a #
19 # version of the License. Please send any updates to "fred@gruntose.com". #
20 ###############################################################################
22 #require "filename_helper.pl";
23 #require "importenv.pl";
24 #require "inc_num.pl";
26 &generate_web_page(@ARGV);
30 sub generate_web_page {
31 local($text_file, $web_page) = @_;
32 if ($text_file eq "") {
33 print "The first parameter must be a text file to use as input.\n";
36 if (! -e $text_file) {
37 print "The text file that you specified does not exist.\n";
40 if ($web_page eq "") {
41 print "The second parameter must be a web page to create.\n";
45 print "The web page you specified is already present--not overwriting.\n";
49 open(INPUT_FILE, "<$text_file")
50 || die("Could not open the text file $text_file for reading.\n");
51 open(OUTPUT_FILE, ">$web_page")
52 || die("Could not open the web page $web_page for writing.\n");
54 # dump the web heading stuff out.
56 <!DOCTYPE doctype PUBLIC \"-//w3c//dtd html 4.0 transitional//en\">
59 <meta http-equiv=\"Content-Type\"
60 content=\"text/html; charset=iso-8859-1\">
61 <meta name=\"GENERATOR\"
62 content=\"Fredzilla/14 [en] (linux; U) [Muttscape]\">
63 <meta name=\"Author\" content=\"Fred T. Hamster\">
64 <title>Links scavenged from $text_file</title>
66 <body link=\"#ffff99\" vlink=\"#ffcc33\" alink=\"#ffcc66\"
67 style=\"background-color: rgb(0, 102, 0); color: rgb(204, 255, 255);\">
72 while (<INPUT_FILE>) {
74 chomp $current; # take CR off of end.
76 # take spaces off the end of the line.
77 while (substr($current, -1, 1) eq " ") { chop $current; }
78 # take spaces off the front of the line.
79 while (substr($current, 0, 1) eq " ") { $current = substr($current, 1); }
81 # this block repairs partial URLs, if there is not protocol present.
82 if ($current =~ /[^h][^t][^t][^p][^:][^\/\\][^\/\\]www\./) {
83 # repair a missing http:// in front.
84 $just_text = $current;
85 $just_text =~ s/(.*)www\.[^ ]*(.*)/\1 \2/;
86 #print "just text is $just_text\n";
87 $current =~ s/.*(www\.[^ ]*).*/http:\/\/\1/;
88 #print "curr is $current\n";
89 print OUTPUT_FILE "$just_text\n<br>\n";
90 } elsif ($current =~ /[^f][^t][^p][^:][^\/\\][^\/\\]ftp\./) {
91 # repair a missing ftp:// in front.
92 $just_text = $current;
93 $just_text =~ s/(.*)ftp\.[^ ]*(.*)/\1 \2/;
94 #print "just text is $just_text\n";
95 $current =~ s/.*(ftp\.[^ ]*).*/ftp:\/\/\1/;
96 #print "curr is $current\n";
97 print OUTPUT_FILE "$just_text\n<br>\n";
98 ### print OUTPUT_FILE "<a href=\"ftp://$current\">$current</a><br>\n";
101 # look for matches to our supported URL types.
102 if ($current =~ /http:/) {
103 # treat a web URL simply by plugging it into a link definition.
104 $just_url = $current;
105 $just_url =~ s/.*(http:[^ ]*).*/\1/;
106 #print "just url is $just_url\n";
107 $just_text = $current;
108 $just_text =~ s/(.*)http:[^ ]*(.*)/\1 \2/;
109 #print "just text is $just_text\n";
110 print OUTPUT_FILE "$just_text\n";
111 print OUTPUT_FILE "<br><a href=\"$just_url\">$just_url</a><br>\n";
112 } elsif ($current =~ /https:/) {
113 # treat a secure web URL simply by plugging it into a link definition.
114 $just_url = $current;
115 $just_url =~ s/.*(https:[^ ]*).*/\1/;
116 #print "just url is $just_url\n";
117 $just_text = $current;
118 $just_text =~ s/(.*)https:[^ ]*(.*)/\1 \2/;
119 #print "just text is $just_text\n";
120 print OUTPUT_FILE "$just_text\n";
121 print OUTPUT_FILE "<br><a href=\"$just_url\">$just_url</a><br>\n";
122 } elsif ($current =~ /ftp:/) {
123 # treat an ftp URL simply by plugging it into a link definition.
124 $just_url = $current;
125 $just_url =~ s/.*(ftp:[^ ]*).*/\1/;
126 #print "just url is $just_url\n";
127 $just_text = $current;
128 $just_text =~ s/(.*)ftp:[^ ]*(.*)/\1 \2/;
129 #print "just text is $just_text\n";
130 print OUTPUT_FILE "$just_text\n";
131 print OUTPUT_FILE "<br><a href=\"$just_url\">$just_url</a><br>\n";
132 # print OUTPUT_FILE "<a href=\"$current\">$current</a><br>\n";
134 # just print a regular line of text.
135 print OUTPUT_FILE "$current<br>\n";
139 print OUTPUT_FILE "</body>\n</html>\n";