feisty meow concerns codebase 2.140
filename_helper.py
Go to the documentation of this file.
1#! /usr/bin/env python3
2
3"""
4
5Name : filename helper
6Author : Chris Koeritz
7Rights : Copyright (C) 1996-$now by Author
8
9Purpose:
10
11 Functions that manipulate filenames in various helpful ways.
12
13License:
14This program is free software; you can redistribute it and/or modify it
15the terms of the GNU General Public License as published by the Free
16Software Foundation; either version 2 of the License or (at your option)
17any later version. See: "http://www.gruntose.com/Info/GNU/GPL.html" for a
18version of the License. Please send any updates to "fred@gruntose.com".
19
20"""
21
22import os
23import path
24
25
26
27#hmmm: lots of interesting perl interrupt handling stuff. do we need any of that? betting not.
28
41
42
43
44# takes an array of filenames (each possibly containing spaces and/or
45# wildcards) and resolves it to a useful list of actual files.
46def glob_list(original_names: list) -> list:
47 """
48 takes a set of filenames that may be relative (or really arcane) and globs them into a normal list of filenames.
49 """
50
51 to_return = [] # the final form of the name list.
52 print("temp list is: " + original_names)
53
54 # scan through the list we're given.
55 for entry in original_names:
56 print("entry is: " + entry)
57
58 chopped_filename = split_filename(entry)
59 print("chopped 0=" + chopped_filename[0])
60 print("chopped 1=" + chopped_filename[1])
61
62 if chopped_filename[0] == "." or chopped_filename[0] == "..":
63 # add the simple directory name into the list.
64 to_return.append(chopped_filename[0])
65 continue
66
67 if chopped_filename[1] == ".":
68 # add a directory that didn't have more pattern attached.
69 to_return.append(chopped_filename[0])
70 continue
71
72 # get all the contents from the directory (both subdirectories and files).
73 files_found = os.listdir(chopped_filename[0])
74
75 # a dictionary of patterns to find in filenames and their safer replacements.
76 replacement_patterns = {
77 r's/\.': r'\\.', # replace periods with escaped ones.
78 r's/\*': r'.*', # replace asterisks with dot star.
79 r's/\+': r'\\+', # escape plusses.
80 r's/\?': r'\\?', # escape question marks.
81 r's/\|': r'\\|', # escape pipe char.
82 r's/\$': r'\\\$', # escape dollar sign.
83 r's/\[': r'\\[', # escape open bracket.
84 r's/\]': r'\\]', # escape close bracket.
85 r's/\‍(': r'\\(', # escape open quote.
86 r's/\‍)': r'\\)', # escape close quote.
87 r's/\{': r'\\{', # escape open curly bracket.
88 r's/\}': r'\\}' # escape close curly bracket.
89 }
90
91 for possible_name in files_found:
92 match = chopped_filename[1]
93
94 for seek, replacer in replacement_patterns:
95 match = re.sub(seek, replacer, match)
96
97 # make sure that we match against the whole string.
98 match = "^" + match + "\$"
99 print("possibname is '" + possible_name + "':")
100 if re.search(match, possible_name):
101 # this one matches, so add it to our list.
102 to_return.append(chopped_filename[0] + possible_name)
103 print("got a match on:" + chopped_filename)
104
105 return to_return
106
107
108
109
110# reports if two file names are the same file.
111def same_file(file1: str, file2: str) -> bool:
112 try:
113 f1_stat = stat(file1)
114 f2_stat = stat(file2)
115 return (f1_stat.ST_INO == f2_stat.ST_INO) and (f1_stat.ST_DEV == f2_stat.ST_DEV)
116 except:
117 return None
118
119
120
121# splits a filename into a directory and file specification.
122def split_filename(pathname: str) -> list:
123 chewed_name = remove_trailing_slashes(pathname)
124 chewed_name = canonicalize(chewed_name)
125 chewed_name = patch_name_for_pc(chewed_name)
126 if re.search(r'/', chewed_name):
127 # there's a slash in there.
128 directory_part = os.path.dirname(chewed_name)
129 file_part = os.path.basename(chewed_name)
130 if len(file_part) == 0:
131 # if there was no file specification, just add a non-matching spec.
132 file_part = '.'
133 return directory_part, file_part
134 elif chewed_name == '.':
135 # simple comparison to the current directory.
136 return ".", ""
137 elif chewed_name == "..":
138 # simple comparison to the parent directory.
139 return "..", ""
140 else:
141 # no slash in this name, so we fix that and also make sure we match
142 # the whole name.
143 return "./", chewed_name
144
145
146
147#hmmm: kind of legacy to provide our own dirname and basename, but we're
148# just migrating this code right now, not perfecting it.
149
150# returns the directory part of the filename.
151def dirname(pathname: str) -> str:
152 return split_filename(pathname)[0];
153
154# returns the base part of the filename; this omits any directories.
155def basename(pathname: str) -> str:
156 return split_filename(pathname)[1];
157
158# returns the extension found on the filename, if any.
159def extension(pathname: str) -> str:
160 base = basename(str)
161#printf "base is $base";
162 found = -1
163 # work backwards from the end of the base name.
164 for i in range(len(base) - 1, -1, -1):
165#printf "char is " . substr($base, $i, 1) . ""
166 if base[i] == '.':
167 found = i;
168#printf "got period found is $found";
169 break
170 if found >= 0:
171 return base[found : len(base) - found]
172 return "" # no extension seen.
173
174# returns the portion of the filename without the extension.
175def non_extension(pathname: str) -> str:
176 full = remove_trailing_slashes(pathname)
177 full = canonicalize(full)
178 full = patch_name_for_pc(full)
179 ext = extension(full)
180 to_remove = len(ext)
181 return full[0 : len(full) - to_remove]
182
183
184
185# removes all directory slashes (either '/' or '\') from the end of a string.
186def remove_trailing_slashes(pathname: str) -> str:
187 directory_name = pathname
188 # start looking at the end of the string.
189 inspection_point = len(directory_name) - 1;
190 while inspection_point > 0:
191 # examine the last character in the string to see if it's a slash.
192 final_char = directoryname[inspection_point:inspection_point]
193 # leave the loop if it's not a slash.
194 if not final_char == '/' && not final_char == "\\":
195 break
196 directory_name = directory_name[0 : len(directory_name) - 1] # remove the slash.
197 inspection_point-- # check the new last character.
198 return directory_name
199
200
201
202# returns the proper directory separator for this platform. this requires
203# an environment variable called "OS" for non-Unix operating systems. the
204# valid values for that are shown below.
206 if OS == "Windows_NT" or OS == "Windows_95" or OS == "DOS" or OS == "OS2":
207 return "\\"
208 return "/"
209
210
211
212# these mutate the directory slashes in a directory name.
213
214# the one we use most frequently; it uses the unix slash.
215def canonicalize(pathname: str) -> str:
216 return canonicalizer(pathname, '/')
217
218# one that turns names into the style native on the current platform.
219def native_canonicalize(pathname: str) -> str:
220 return canonicalizer(pathname, &directory_separator())
221
222# one that explicitly uses pc style back-slashes.
223def pc_canonicalize(pathname: str) -> str:
224 return canonicalizer(pathname, '\\')
225
226# one that explicitly does unix style forward slashes.
227def unix_canonicalize(pathname: str) -> str:
228 return canonicalizer(pathname, '/')
229
230
231# this more general routine gets a directory separator passed in. it then
232# replaces all the separators with that one.
233def canonicalizer(directory_name: str, dirsep: str) -> str:
234 print("old dir name is " + directory_name)
235
236 # somewhat abbreviated check; only catches windoze systems, not dos or os2.
237 if re.search("win", OS, re.IGNORE_CASE):
238 # IS_MSYS is calculated by feisty meow scripts startup; it will be
239 # non-empty if this is the msys tool kit.
240 if len(IS_MSYS) > 0:
241 # msys utilities version (http://www.mingw.org)
242# $directory_name =~ s/^(.):[\\\/](.*)$/\/\1\/\2/;
243 directory_name = re.sub('^(.):[\\\/](.*)$', '\/\1\/\2')
244 else:
245 # cygwin utilities version (http://www.cygwin.com)
246# $directory_name =~ s/^(.):[\\\/](.*)$/\/cygdrive\/\1\/\2/;
247 directory_name = re.sub('^(.):[\\\/](.*)$', '\/cygdrive\/\1\/\2/')
248#print "new dir name is \"$directory_name\"";
249
250 # turn all the non-default separators into the default.
251 for j in range(0, len(directory_name)):
252# for (local($j) = 0; $j < length($directory_name); $j++) {
253 if directory_name[j, j+1] == "\\" or directory_name[j, j+1] == "/":
254 directory_name[j] = dirsep
255
256 # remove all occurrences of double separators except for the first
257 # double set, which could be a UNC filename.
258 saw_sep = False
259 for i in range(1, len(directory_name)):
260 # iterate through the string looking for redundant separators.
261#hmmm: unconverted below here--monsters !!!
262 if (substr($directory_name, $i, 1) eq $dirsep) {
263 # we found a separator character.
264 if ($saw_sep) {
265 # we had just seen a separator, so this is two in a row.
266 local($head, $tail) = (substr($directory_name, 0, $i - 1),
267 substr($directory_name, $i, length($directory_name) - 1));
268 $directory_name = $head . $tail;
269 # put the name back together without this redundant character.
270 $i--; # skip back one and try again.
271 } else {
272 # we have now seen a separator.
273 $saw_sep = 1;
274 }
275 } else {
276 # this character was not a separator.
277 $saw_sep = 0;
278 }
279 }
280 if ($directory_name =~ /^.:$/) {
281 # fix a dos style directory that's just X:, since we don't want the
282 # current directory to be used on that device. that's too random.
283 # instead, we assume they meant the root of the drive.
284 $directory_name = $directory_name . "/";
285 }
286 return $directory_name;
287}
288
289
290
291# fixes a PC directory name if it is only a drive letter plus colon.
292
293sub patch_name_for_pc {
294 local($name) = @_;
295#print "name=$name";
296 if (length($name) != 2) { return $name; }
297 local($colon) = substr($name, 1, 1);
298#print "colon=$colon";
299 # check whether the string needs patching.
300 if ($colon eq ":") {
301 # name is currently in feeble form of "X:"; fix it.
302 $name = $name . '/';
303 }
304#print "returning=$name";
305 return $name;
306}
307
308
309
310# tells whether a filename is important or not. the unimportant category
311# can usually be safely ignored or deleted.
312
313sub important_filename {
314 local($name) = &basename($_[0]);
315
316 # these are endings that we consider unimportant. where a caret is used
317 # at the front, we will match only the whole string. double slashes are
318 # used before periods to ensure we match a real period character.
319 local(@junk_files) = ("~", "^\\.#.*", "^\\._.*", "\\.aps", "\\.bak",
320 "^binaries", "^bin.ant", "^bin.eclipse",
321 "\\.clw", "^cpdiff_tmp\\.txt", "^\\.ds_store", "^diffs\\.txt",
322 "^diff_tmp\\.txt", "\\.dsp", "\\.dsw", "\\.gid", "gmon\\.out", "\\.isr",
323 "^isconfig\\.ini", "\\.log", "^manifest.txt", "^obj",
324 "\\.obj", "\\.output", "\\.plg", "^RCa.*", "^Release", "\\.res",
325 "\\.sbr", ".*scc", "^Setup\\.dbg", "^Setup\\.inx",
326 "^Setup\\.map", "^Setup\\.obs", "^Selenium_.*Login.html",
327 "\\.stackdump", "^string1033\\.txt", "\\.suo", "\\.swp",
328 "^thumbs.db", "[a-zA-Z0-9]\\.tmp", "^trans\\.tbl", "\\.user", "_version\\.h",
329 "_version\\.rc", "^waste", "\\.ws4", "\\.wsm");
330
331 foreach $temp (@junk_files) {
332 $temp = $temp . '$';
333 if ($name =~ /${temp}/i) { return 0; }
334 # we hit a match on it being unimportant.
335 }
336
337 return 1; # anything else is considered important.
338}
339
340
341
342sub sanitize_name {
343 return &patch_name_for_pc
344 (&remove_trailing_slashes
345 (&canonicalize(@_)));
346}
347
348
349
350sub get_drive_letter {
351 local($path) = @_;
352 if (substr($path, 0, 1) =~ /[a-zA-Z]/) {
353 if (substr($path, 1, 1) eq ":") { return substr($path, 0, 1); }
354 }
355 return "";
356}
357
358
359
360sub remove_drive_letter {
361 local($path) = @_;
362 if (substr($path, 0, 1) =~ /[a-zA-Z]/) {
363 if (substr($path, 1, 1) eq ":") { return substr($path, 2); }
364 }
365 return $path;
366}
367
368
369
370# these return their argument with the case flipped to lower or upper case.
371
372sub lower {
373 local($name) = @_;
374 $name =~ tr/A-Z/a-z/;
375 return $name;
376}
377
378sub upper {
379 local($name) = @_;
380 $name =~ tr/a-z/A-Z/;
381 return $name;
382}
383
384
385
386# recursively deletes a directory that is passed as the single parameter.
387# from http://developer.novell.com/wiki/index.php/Recursive_Directory_Remove
388sub recursive_delete {
389 my $dir;
390 foreach $dir (@_) {
391 if ( -f "$dir" ) {
392print "this is not a dir: $dir => should whack it here?";
393return;
394 }
395
396 local *DIR;
397 # if we can't open the dir, just skip to the next one.
398 opendir DIR, $dir or next;
399 while ($_ = readdir DIR) {
400 next if /^\.{1,2}$/;
401 my $path = "$dir/$_";
402 unlink $path if -f $path;
403 recursive_delete($path) if -d $path;
404 }
405 closedir DIR;
406 rmdir $dir or print "error - $!";
407 }
408}
409
410
411
412# finds any directories under the arguments, which can be a list of directories.
413sub find_directories {
414 my @dirs_found = ();
415 my $dir;
416 foreach $dir (@_) {
417 local *DIR;
418 # if we can't open the dir, just skip to the next one.
419 opendir DIR, $dir or next;
420 while ($_ = readdir DIR) {
421 # skip if it's current or parent dir.
422 next if /^\.{1,2}$/;
423 my $path = "$dir/$_";
424 # skip if this entry is not itself a directory.
425 next if ! -d $path;
426 push @dirs_found, $path;
427 }
428 closedir DIR;
429 }
430 return @dirs_found;
431}
432
433
434
435# given a list of paths, this returns an array of all the filenames found therein.
436sub find_files {
437 my @files_found = ();
438 my $dir;
439 foreach $dir (@_) {
440 if (-f $dir) {
441 # that's actually just a file, so add it.
442 push @files_found, $dir;
443 next;
444 }
445 local *DIR;
446 # if we can't open the dir, just skip to the next one.
447 opendir DIR, $dir or next;
448 while ($_ = readdir DIR) {
449 # skip if it's current or parent dir.
450 next if /^\.{1,2}$/;
451 my $path = "$dir/$_";
452 # skip if this entry is not a file.
453 next if ! -f $path;
454 push @files_found, $path;
455 }
456 closedir DIR;
457 }
458 return @files_found;
459}
460
461
462
463# finds all directories starting at a particular directory and returns them
464# in an array. does not include the starting directory.
465sub recursive_find_directories {
466 # first find all the directories within the parameters.
467 my @toplevel = find_directories(@_);
468
469 my @to_return;
470 push(@to_return, @toplevel);
471
472 # return the composition of the list we found here plus any directories under those.
473 # we only recurse if there's something to chew on in our directory list.
474 # otherwise, we've hit the bottom of that tree.
475 if (scalar @toplevel > 0) {
476 my @subs_found = recursive_find_directories(@toplevel);
477 push(@to_return, @subs_found);
478 }
479 return @to_return;
480}
481
482
483
4841;
485
#define stat
Definition Xos2defs.h:41
bool same_file(str file1, str file2)
list glob_list(list original_names)
#hmmm: make this lower-level, a script that is inherited by all perl scripts.
str native_canonicalize(str pathname)
str basename(str pathname)
str dirname(str pathname)
str pc_canonicalize(str pathname)
list split_filename(str pathname)
str canonicalizer(str directory_name, str dirsep)
str canonicalize(str pathname)
str unix_canonicalize(str pathname)
str remove_trailing_slashes(str pathname)
str non_extension(str pathname)
str extension(str pathname)