3 ###############################################################################
5 # Name : filename_helper #
6 # Author : Chris Koeritz #
7 # Rights : Copyright (C) 1996-$now by Author #
11 # Support for manipulating filenames. #
13 ###############################################################################
14 # This program is free software; you can redistribute it and/or modify it #
15 # under the terms of the GNU General Public License as published by the Free #
16 # Software Foundation; either version 2 of the License or (at your option) #
17 # any later version. See: "http://www.gruntose.com/Info/GNU/GPL.html" for a #
18 # version of the License. Please send any updates to "fred@gruntose.com". #
19 ###############################################################################
21 use Env qw(OS IS_MSYS);
23 ############################################################################
25 #hmmm: make this lower-level, a script that is inherited by all perl scripts.
27 sub yeti_interrupt_handler {
28 die "caught an interrupt; exiting.\n";
31 # hook in a ctrl-c catcher, since that seems to be universally needed.
32 sub install_interrupt_catcher {
33 $SIG{INT} = 'yeti_interrupt_handler';
34 $SIG{QUIT} = 'yeti_interrupt_handler';
35 #print "mapped int and quit signals\n";
39 ############################################################################
41 # takes an array of filenames (each possibly containing spaces and/or
42 # wildcards) and resolves it to a useful list of actual files.
45 local(@temp_list) = @_; # the names we're given.
46 local(@to_return) = (); # the final form of the name list.
47 #print "temp list is @temp_list\n";
49 # scan through the list we're given.
50 foreach $entry (@temp_list) {
51 #print "entry is $entry\n";
52 local(@chopped_filename) = &split_filename($entry);
53 #print "chopped 0=$chopped_filename[0]\n";
54 #print "chopped 1=$chopped_filename[1]\n";
55 if ( (@chopped_filename[0] eq ".") || (@chopped_filename[0] eq "..") ) {
56 # add the simple directory name into the list.
57 push @to_return, $chopped_filename[0];
60 if (@chopped_filename[1] eq ".") {
61 # add a directory that didn't have more pattern attached.
62 push @to_return, $chopped_filename[0];
65 opendir WHERE, $chopped_filename[0]; # open the directory up.
66 local(@files_found) = readdir(WHERE);
68 foreach $possible_name (@files_found) {
69 # we need to process the pattern a bit; directory patterns are different
70 # from perl regular expression patterns, so we end up massaging any "ls"
71 # wildcards into an equivalent perl-style one below.
72 local($match) = $chopped_filename[1];
73 #hmmm: would be nice to combine the replacements into a long batch instead of separate commands, but i do not seem to know how to do that yet in perl.
74 $match =~ s/\./\\./g; # replace periods with escaped ones.
75 $match =~ s/\*/.*/g; # replace asterisks with dot star.
76 $match =~ s/\+/\\+/g; # escape plusses.
77 $match =~ s/\?/\\?/g; # escape question marks.
78 $match =~ s/\|/\\|/g; # escape pipe char.
79 $match =~ s/\$/\\\$/g; # escape dollar sign.
80 $match =~ s/\[/\\[/g; # escape open bracket.
81 $match =~ s/\]/\\]/g; # escape close bracket.
82 $match =~ s/\(/\\(/g; # escape open quote.
83 $match =~ s/\)/\\)/g; # escape close quote.
84 $match =~ s/\{/\\{/g; # escape open curly bracket.
85 $match =~ s/\}/\\}/g; # escape close curly bracket.
87 $match = "^" . $match . "\$"; # make sure the whole thing matches.
88 #print "possibname is '$possible_name':\n";
89 if ($possible_name =~ /$match/) {
90 # this one matches so add it.
91 push @to_return, $chopped_filename[0] . $possible_name;
92 #print "a match on: $chopped_filename\n";
99 ############################################################################
101 # reports if two file names are the same file.
104 local($file1, $file2) = @_;
106 ($dev1, $ino1, $junk1) = stat $file1;
107 ($dev2, $ino2, $junk2) = stat $file2;
109 return ($dev1 == $dev2) && ($ino1 == $ino2);
112 ############################################################################
114 # splits a filename into a directory and file specification.
117 local($chewed_name) = &remove_trailing_slashes(@_);
118 $chewed_name = &canonicalize($chewed_name);
119 $chewed_name = &patch_name_for_pc($chewed_name);
120 if ($chewed_name =~ /\//) {
121 # there's a slash in there.
122 local($directory_part) = $chewed_name;
123 $directory_part =~ s/^(.*\/)[^\/]*$/\1/;
124 local($file_part) = $chewed_name;
125 $file_part =~ s/^.*\/([^\/]*)$/\1/;
126 if ($file_part eq "") {
127 # if there was no file specification, just add a non-matching spec.
130 return ($directory_part, $file_part);
131 } elsif ($chewed_name eq ".") {
133 } elsif ($chewed_name eq "..") {
136 # no slash in this name, so we fix that and also make sure we match
138 return ("./", $chewed_name);
142 ############################################################################
144 # returns the base part of the filename; this omits any directories.
147 local(@parts) = &split_filename(@_);
151 # returns the directory part of the filename.
154 local(@parts) = &split_filename(@_);
158 # returns the extension found on the filename, if any.
160 local($base) = &basename(@_);
161 #printf "base is $base\n";
163 for (local($i) = length($base) - 1; $i >= 0; $i--) {
164 #printf "char is " . substr($base, $i, 1) . "\n";
165 if (substr($base, $i, 1) eq '.') {
167 #printf "got period found is $found\n";
172 return substr($base, $found, length($base) - $found);
174 return ""; # no extension seen.
177 # returns the portion of the filename without the extension.
179 local($full) = &remove_trailing_slashes(@_);
180 $full = &canonicalize($full);
181 $full = &patch_name_for_pc($full);
182 local($ext) = &extension($full);
183 local($to_remove) = length($ext);
184 return substr($full, 0, length($full) - $to_remove);
187 ############################################################################
189 # removes all directory slashes (either '/' or '\') from the end of a string.
191 sub remove_trailing_slashes {
192 local($directory_name) = @_;
193 # start looking at the end of the string.
194 local($inspection_point) = length($directory_name) - 1;
195 while ($inspection_point > 0) {
196 # examine the last character in the string to see if it's a slash.
197 local($final_char) = substr($directory_name, $inspection_point, 1);
198 # leave the loop if it's not a slash.
199 if ( ($final_char ne "/") && ($final_char ne "\\") ) { last; }
200 chop($directory_name); # remove the slash.
201 $inspection_point--; # check the new last character.
204 return $directory_name;
207 ############################################################################
209 # returns the proper directory separator for this platform. this requires
210 # an environment variable called "OS" for non-Unix operating systems. the
211 # valid values for that are shown below.
213 sub directory_separator {
214 if ( ($OS eq "Windows_NT") || ($OS eq "Windows_95")
215 || ($OS eq "DOS") || ($OS eq "OS2") ) { return "\\"; }
219 ############################################################################
221 # these mutate the directory slashes in a directory name.
223 # the one we use most frequently; it uses the unix slash.
225 return &canonicalizer(@_, "/");
228 # one that turns names into the style native on the current platform.
229 sub native_canonicalize {
230 return &canonicalizer(@_, &directory_separator());
233 # one that explicitly uses pc style back-slashes.
234 sub pc_canonicalize {
235 return &canonicalizer(@_, "\\");
238 # one that explicitly does unix style forward slashes.
239 sub unix_canonicalize {
240 return &canonicalizer(@_, "/");
243 # this more general routine gets a directory separator passed in. it then
244 # replaces all the separators with that one.
246 local($directory_name) = $_[0];
247 local($dirsep) = $_[1];
249 #print "old dir name is \"$directory_name\"\n";
252 #somewhat abbreviated check; only catches windoze systems, not dos or os2.
253 # IS_MSYS is calculated by feisty meow scripts startup; it will be
254 # non-empty if this is the msys tool kit.
255 if (length($IS_MSYS) > 0) {
256 # msys utilities version (http://www.mingw.org)
257 $directory_name =~ s/^(.):[\\\/](.*)$/\/\1\/\2/;
259 # cygwin utilities version (http://www.cygwin.com)
260 $directory_name =~ s/^(.):[\\\/](.*)$/\/cygdrive\/\1\/\2/;
262 #print "new dir name is \"$directory_name\"\n";
265 # turn all the non-default separators into the default.
266 for (local($j) = 0; $j < length($directory_name); $j++) {
267 if ( (substr($directory_name, $j, 1) eq "\\")
268 || (substr($directory_name, $j, 1) eq "/") ) {
269 substr($directory_name, $j, 1) = $dirsep;
272 # remove all occurrences of double separators except for the first
273 # double set, which could be a UNC filename.
275 for (local($i) = 1; $i < length($directory_name); $i++) {
276 # iterate through the string looking for redundant separators.
277 if (substr($directory_name, $i, 1) eq $dirsep) {
278 # we found a separator character.
280 # we had just seen a separator, so this is two in a row.
281 local($head, $tail) = (substr($directory_name, 0, $i - 1),
282 substr($directory_name, $i, length($directory_name) - 1));
283 $directory_name = $head . $tail;
284 # put the name back together without this redundant character.
285 $i--; # skip back one and try again.
287 # we have now seen a separator.
291 # this character was not a separator.
295 if ($directory_name =~ /^.:$/) {
296 # fix a dos style directory that's just X:, since we don't want the
297 # current directory to be used on that device. that's too random.
298 # instead, we assume they meant the root of the drive.
299 $directory_name = $directory_name . "/";
301 return $directory_name;
304 ############################################################################
306 # fixes a PC directory name if it is only a drive letter plus colon.
308 sub patch_name_for_pc {
310 #print "name=$name\n";
311 if (length($name) != 2) { return $name; }
312 local($colon) = substr($name, 1, 1);
313 #print "colon=$colon\n";
314 # check whether the string needs patching.
316 # name is currently in feeble form of "X:"; fix it.
319 #print "returning=$name\n";
323 ############################################################################
325 # tells whether a filename is important or not. the unimportant category
326 # can usually be safely ignored or deleted.
328 sub important_filename {
329 local($name) = &basename($_[0]);
331 # these are endings that we consider unimportant. where a caret is used
332 # at the front, we will match only the whole string. double slashes are
333 # used before periods to ensure we match a real period character.
334 local(@junk_files) = ("~", "^\\.#.*", "^\\._.*", "\\.aps", "\\.bak",
335 "^binaries", "^bin.ant", "^bin.eclipse",
336 "\\.clw", "^cpdiff_tmp\\.txt", "^\\.ds_store", "^diffs\\.txt",
337 "^diff_tmp\\.txt", "\\.dsp", "\\.dsw", "\\.gid", "gmon\\.out", "\\.isr",
338 "^isconfig\\.ini", "\\.log", "^manifest.txt", "^obj",
339 "\\.obj", "\\.output", "\\.plg", "^RCa.*", "^Release", "\\.res",
340 "\\.sbr", ".*scc", "^Setup\\.dbg", "^Setup\\.inx",
341 "^Setup\\.map", "^Setup\\.obs", "^Selenium_.*Login.html",
342 "\\.stackdump", "^string1033\\.txt", "\\.suo", "\\.swp",
343 "^thumbs.db", "[a-zA-Z0-9]\\.tmp", "^trans\\.tbl", "\\.user", "_version\\.h",
344 "_version\\.rc", "^waste", "\\.ws4", "\\.wsm");
346 foreach $temp (@junk_files) {
348 if ($name =~ /${temp}/i) { return 0; }
349 # we hit a match on it being unimportant.
352 return 1; # anything else is considered important.
355 ############################################################################
358 return &patch_name_for_pc
359 (&remove_trailing_slashes
360 (&canonicalize(@_)));
363 ############################################################################
365 sub get_drive_letter {
367 if (substr($path, 0, 1) =~ /[a-zA-Z]/) {
368 if (substr($path, 1, 1) eq ":") { return substr($path, 0, 1); }
373 ############################################################################
375 sub remove_drive_letter {
377 if (substr($path, 0, 1) =~ /[a-zA-Z]/) {
378 if (substr($path, 1, 1) eq ":") { return substr($path, 2); }
383 ############################################################################
385 # these return their argument with the case flipped to lower or upper case.
389 $name =~ tr/A-Z/a-z/;
395 $name =~ tr/a-z/A-Z/;
399 ############################################################################
401 # recursively deletes a directory that is passed as the single parameter.
402 # from http://developer.novell.com/wiki/index.php/Recursive_Directory_Remove
403 sub recursive_delete {
407 print "this is not a dir: $dir\nshould whack it here?\n";
412 # if we can't open the dir, just skip to the next one.
413 opendir DIR, $dir or next;
414 while ($_ = readdir DIR) {
416 my $path = "$dir/$_";
417 unlink $path if -f $path;
418 recursive_delete($path) if -d $path;
421 rmdir $dir or print "error - $!";
425 ############################################################################
427 # finds any directories under the arguments, which can be a list of directories.
428 sub find_directories {
433 # if we can't open the dir, just skip to the next one.
434 opendir DIR, $dir or next;
435 while ($_ = readdir DIR) {
436 # skip if it's current or parent dir.
438 my $path = "$dir/$_";
439 # skip if this entry is not itself a directory.
441 push @dirs_found, $path;
448 ############################################################################
450 # given a list of paths, this returns an array of all the filenames found therein.
452 my @files_found = ();
456 # that's actually just a file, so add it.
457 push @files_found, $dir;
461 # if we can't open the dir, just skip to the next one.
462 opendir DIR, $dir or next;
463 while ($_ = readdir DIR) {
464 # skip if it's current or parent dir.
466 my $path = "$dir/$_";
467 # skip if this entry is not a file.
469 push @files_found, $path;
476 ############################################################################
478 # finds all directories starting at a particular directory and returns them
479 # in an array. does not include the starting directory.
480 sub recursive_find_directories {
481 # first find all the directories within the parameters.
482 my @toplevel = find_directories(@_);
485 push(@to_return, @toplevel);
487 # return the composition of the list we found here plus any directories under those.
488 # we only recurse if there's something to chew on in our directory list.
489 # otherwise, we've hit the bottom of that tree.
490 if (scalar @toplevel > 0) {
491 my @subs_found = recursive_find_directories(@toplevel);
492 push(@to_return, @subs_found);
497 ############################################################################