feisty meow concerns codebase 2.140
fix_project_references.py
Go to the documentation of this file.
1#!/usr/bin/python
2
3import os;
4import random;
5
7 """ This tool fixes visual studio 2010 projects to have the proper project references.
8
9 Project files need to refer to other project files that they are dependent on if visual
10 studio is to build them properly. This is a painful task when done manually, but luckily
11 this script automates the process for you.
12 It requires an environment variable called BUILD_TOP that points at the top of all files
13 included in a code base. This is used to find the dependent projects.
14
15
16hmmm: this tool is NOT finished.
17 """
18
19
20
21 def __init__(self, argv):
22 """ Initializes the class with a set of arguments to work with.
23
24 The arguments need to be in the form described by print_instructions().
25 """
26 self.arguments = argv # remembers the command line for us.
27 self.file_buffer = "" # use for file i/o in the class.
28 # initializes the list of projects found for the current source code hierarchy.
29#hmmm: hier top denoted right now by the variable BUILD_TOP. may want to document that.
30 src_dir = os.getenv("BUILD_TOP")
31 # projects and assets should keep in step, where project[x] is known to create asset[x].
32 self.projects = self.walk_directory_for_projects(src_dir) # list of project files.
33 self.assets = self.locate_all_assets() # dictionary of assets created by project files.
34
35
36
37#fix
39 """ Shows the instructions for using this class. """
40 print("""
41This script will replace all occurrences of a phrase you specify in a set of files. The
42replacement process will be careful about C and C++ syntax and will not replace occurrences
43within comments or which are not "complete" phrases (due to other alpha-numeric characters
44that abut the phrase). The arguments to the script are:
45
46 {0}: PhraseToReplace ReplacementPhrase File1 [File2 ...]
47
48For example, if the phrase to replace is Goop, it will be replaced in these contexts:
49 Goop[32]
50 molo-Goop
51 *Goop
52but it will not be found in these contexts:
53 // doop de Goop
54 rGoop
55 Goop23
56""".format(self.arguments[0]))
57
58 #fix
60 """ Performs command line argument handling. """
61 arg_count = len(self.arguments)
62# for i in range(1, arg_count):
63# print("i is {0}, arg is {1}".format(i, self.arguments[i]))
64 # we need more than 2 arguments, since there needs to be at least one file also.
65 if arg_count < 4:
66 return False
69 print("got phrase to replace: \'{0}\' and replacement: \'{1}\'".format(self.phrase_to_replace, self.replacement_bit))
70 self.files = self.arguments[3:]
71 return True
72
73
74
75#hmmm: are these good enough to become part of a file library?
76
77 def read_file_data(self, filename):
78 """ loads the file into our memory buffer for processing. """
79 try:
80 our_file = open(filename, "rb")
81 try:
82 self.file_buffer = our_file.read()
83 except IOError:
84 print("There was an error reading the file {0}".format(filename))
85 return False
86 finally:
87 our_file.close()
88 except IOError:
89 print("There was an error opening the file {0}".format(filename))
90 return False
91 self.file_lines = self.file_buffer.splitlines()
92 return True
93
94 def write_file_data(self, filename):
95 """ takes the processed buffer and sends it back out to the filename. """
96# output_filename = filename + ".new" # safe testing version.
97 output_filename = filename
98 try:
99 our_file = open(output_filename, "wb")
100 try:
101 self.file_buffer = our_file.write(self.processed_buffer)
102 except IOError:
103 print("There was an error writing the file {0}".format(output_filename))
104 return False
105 finally:
106 our_file.close()
107 except IOError:
108 print("There was an error opening the file {0}".format(output_filename))
109 return False
110 return True
111
112
113
114 #unused?
115 def is_alphanumeric(self, check_char):
116 """ given a character, this returns true if it's between a-z, A-Z or 0-9. """
117 if (check_char[0] == "_"):
118 return True
119 if ( (check_char[0] <= "z") and (check_char[0] >= "a")):
120 return True
121 if ( (check_char[0] <= "Z") and (check_char[0] >= "A")):
122 return True
123 if ( (check_char[0] <= "9") and (check_char[0] >= "0")):
124 return True
125 return False
126
127 #unused?
128 def replace_within_string(self, fix_string):
129 """ given a string to fix, this replaces all appropriate locations of the phrase. """
130 indy = 0
131# print("got to replace within string")
132 while (indy < len(fix_string)):
133 # locate next occurrence of replacement text, if any.
134 indy = fix_string.find(self.phrase_to_replace, indy)
135# print("find indy={0}".format(indy))
136 if (indy > -1):
137# print("found occurrence of replacement string")
138 # we found an occurrence, but we have to validate it's separated enough.
139 char_before = "?" # simple default that won't fail our check.
140 char_after = "?"
141 if (indy > 0):
142 char_before = fix_string[indy-1]
143 if (indy + len(self.phrase_to_replace) < len(fix_string) - 1):
144 char_after = fix_string[indy+len(self.phrase_to_replace)]
145# print("char before {0}, char after {1}".format(char_before, char_after))
146 if (not self.is_alphanumeric(char_before) and not self.is_alphanumeric(char_after)):
147 # this looks like a good candidate for replacement.
148 fix_string = "{0}{1}{2}".format(fix_string[0:indy], self.replacement_bit, fix_string[indy+len(self.phrase_to_replace):])
149# print("changed string to: {0}".format(fix_string))
150 else:
151 break
152 indy += 1 # no matches means we have to keep skipping forward.
153 return fix_string # give back processed form.
154
155 #unused?
157 """ iterates through the stored version of the file and replaces the phrase. """
158 self.state = self.EATING_NORMAL_TEXT;
159 # clear out any previously processed text.
160 self.processed_buffer = "" # reset our new version of the file contents.
163 # iterate through the file's lines.
164 contents = self.file_lines
165 while (len(contents) > 0):
166 # get the next line out of the input.
167 next_line = contents[0]
168 # drop that line from the remaining items.
169 contents = contents[1:]
170# print("next line: {0}".format(next_line))
171 # decide if we need a state transition.
172 indy = 0
173 if ((len(next_line) > 0) and (self.state == self.EATING_NORMAL_TEXT) and ('/' in next_line)):
174 # loop to catch cases where multiple slashes are in line and one IS a comment.
175 while (indy < len(next_line)):
176 # locate next slash, if any.
177 indy = next_line.find('/', indy)
178 if (indy < 0):
179 break
180 if ((len(next_line) > indy + 1) and (next_line[indy + 1] == '/')):
181 # switch states and handle any pent-up text.
182 self.normal_accumulator += next_line[0:indy] # get last tidbit before comment start.
183 next_line = next_line[indy:] # keep only the stuff starting at slash.
184 self.state = self.EATING_ONELINE_COMMENT
185# print("state => oneline comment")
186 self.emit_normal_accumulator()
187 break
188 if ((len(next_line) > indy + 1) and (next_line[indy + 1] == '*')):
189 # switch states and deal with accumulated text.
190 self.normal_accumulator += next_line[0:indy] # get last tidbit before comment start.
191 next_line = next_line[indy:] # keep only the stuff starting at slash.
192 self.state = self.EATING_MULTILINE_COMMENT
193# print("state => multiline comment")
194 self.emit_normal_accumulator()
195 break
196 indy += 1 # no matches means we have to keep skipping forward.
197
198 # now handle things appropriately for our current state.
199 if (self.state == self.EATING_NORMAL_TEXT):
200 # add the text to the normal accumulator.
201# print("would handle normal text")
202 self.normal_accumulator += next_line + "\n"
203 elif (self.state == self.EATING_ONELINE_COMMENT):
204 # save the text in comment accumulator.
205# print("would handle oneline comment")
206 self.comment_accumulator += next_line + "\n"
207 self.emit_comment_accumulator()
208 self.state = self.EATING_NORMAL_TEXT
209 elif (self.state == self.EATING_MULTILINE_COMMENT):
210 # save the text in comment accumulator.
211# print("would handle multiline comment")
212 self.comment_accumulator += next_line + "\n"
213 # check for whether the multi-line comment is completed on this line.
214 if ("*/" in next_line):
215# print("found completion for multiline comment on line.")
216 self.emit_comment_accumulator()
217 self.state = self.EATING_NORMAL_TEXT
218 # verify we're not in the wrong state still.
219 if (self.state == self.EATING_MULTILINE_COMMENT):
220 print("file seems to have unclosed multi-line comment.")
221 # last step is to spit out whatever was trailing in the accumulator.
222 self.emit_normal_accumulator()
223 # if we got to here, we seem to have happily consumed the file.
224 return True
225
226 #use for example.
228 """ Orchestrates the process of replacing the phrases. """
229 # process our command line arguments to see what we need to do.
230 try_command_line = self.validate_and_consume_command_line()
231 if (try_command_line != True):
232 print("failed to process the command line...\n")
233 self.print_instructions()
234 exit(1)
235 # iterate through the list of files we were given and process them.
236 for i in range(0, len(self.files)):
237 print("file {0} is \'{1}\'".format(i, self.files[i]))
238 worked = self.read_file_data(self.files[i])
239 if (worked is False):
240 print("skipping since file read failed on: {0}".format(self.files[i]))
241 continue
242# print("{0} got file contents:\n{1}".format(self.files[i], self.file_lines))
243 worked = self.process_file_data()
244 if (worked is False):
245 print("skipping, since processing failed on: {0}".format(self.files[i]))
246 continue
247 worked = self.write_file_data(self.files[i])
248 if (worked is False):
249 print("writing file back failed on: {0}".format(self.files[i]))
250 print("finished processing all files.")
251
252
253
255 """ the main entry point to the project fixing process.
256
257 Operates on one project file at a time by:
258 1) finding all libraries (lib files) used by the project A,
259 2) locating the external project that creates each lib file,
260 3) adding a reference to the external projects to our project A.
261
262 We rely on some important assumptions to get this done:
263 1) project names and project file names are unique across the codebase,
264 2) the structure of the source code hierarchies uses a compatible architecture,
265(which is?)
266
267 """
268 print("repair is unimplemented")
269
270
271
272 def extract_xml_tag(self, file_line, tag_name):
273 """ locates an XML tag with "tag_name" and returns the contents of the tag.
274
275 this currently assumes that the start tag, contents, and end tag are all on the same
276 line of text (which is not a very good assumption in general).
277 """
278 if ('<' + tag_name in file_line):
279 # find the greater than character.
280 gt_pos = file_line.find('>', 0)
281 if (gt_pos < 0): return "" # fail.
282 tag_close_pos = file_line.find('</' + tag_name, gt_pos + 1);
283 if (tag_close_pos < 0): return "" # fail.
284 return file_line[gt_pos + 1 : tag_close_pos]
285 return "" # failed to find anything relevant.
286
287 def extract_xml_attribute(self, file_line, tag_name, attribute_name):
288 """ locates an XML tag with "tag_name" and returns the value of the "attribute_name".
289
290 """
291 if ('<' + tag_name not in file_line): return ""
292 if (attribute_name not in file_line): return ""
293 attrib_pos = file_line.find(attribute_name, 0)
294 # find the first quote around the attribute.
295 quote_pos = file_line.find('"', attrib_pos)
296 if (quote_pos < 0): return "" # fail.
297 quote_close_pos = file_line.find('"', quote_pos + 1)
298 if (quote_close_pos < 0): return "" # fail.
299 return file_line[quote_pos + 1 : quote_close_pos]
300
301
302
303 def extract_xml_tag_from_file(self, filename, tag_name):
304 """ reads in a file and extracts the contents of a particular XML tag.
305
306may not want a file read here. better to have a nice established way for
307dealing with the existing buffer.
308 """
309 self.read_file_data(filename)
310 contents = self.file_lines
311 while (len(contents) > 0):
312 # get the next line out of the input.
313 next_line = contents[0]
314 # drop that line from the remaining items.
315 contents = contents[1:]
316 #hmmm: maybe bad assumption thinking all on one line...
317 found = self.extract_xml_tag(next_line, tag_name)
318 if (found != ""): return found
319 return "" # failed to find anything relevant.
320
321
322
324 """ reads in a visual studio project file and figures out that project's GUID.
325
326 note that this will fail horribly if the project has been messed with and is no
327 longer in microsoft's official format.
328 """
329 return self.extract_xml_tag_from_file(filename, 'ProjectGuid')
330
332 """ given a ProjectReference line, this pulls out just the filename involved.
333 """
334 return self.extract_xml_attribute(file_line, "ProjectReference", "Include")
335
336 def find_all_project_references(self, filename):
337 """ reads in a visual studio project file and locates all references.
338 """
339 self.read_file_data(filename)
340 contents = self.file_lines
341 refs_list = []
342 while (len(contents) > 0):
343 # get the next line out of the input.
344 next_line = contents[0]
345 # drop that line from the remaining items.
346 contents = contents[1:]
347 ref = self.extract_filename_from_project_reference(next_line)
348 if (ref != ''):
349 #print("got reference: " + os.path.basename(ref) + " => " + ref)
350 refs_list.append(ref)
351 return refs_list
352
353 def parse_dependency_line(self, line):
354 """ given an AdditionalDependencies line, this finds the libs listed.
355 """
356 just_libs = self.extract_xml_tag(line, "AdditionalDependencies")
357 if (just_libs == ""): return []
358 lib_list = just_libs.split(';')
359 # here we scan the list and remove any known-stupid entries.
360 for i in range(1, len(lib_list)):
361 if (lib_list[i] == '%(AdditionalDependencies)'):
362 lib_list = lib_list[0 : i] + lib_list[i + 1 : len(lib_list)]
363 i -= 1
364 return lib_list
365
366 #hmmm: could be a problem if the debug and release values differ.
367 def extract_dependencies(self, filename):
368 """ reads in a visual studio project file and locates all dependencies.
369
370 This will produce a list of the lib files used by c++ projects. These
371 are what we need to link up to their providing projects, if they're
372 actually things that we build.
373 """
374 self.read_file_data(filename)
375 contents = self.file_lines
376 while (len(contents) > 0):
377 # get the next line out of the input.
378 next_line = contents[0]
379 # drop that line from the remaining items.
380 contents = contents[1:]
381 if ('AdditionalDependencies' in next_line):
382 deps = self.parse_dependency_line(next_line)
383 return deps
384 return () # failed to find right line.
385
386
387
388#hmmm: could also be a problem if the debug and release values differ.
389 def find_asset_created(self, filename):
390 """ determines the asset created by a visual studio project file.
391
392 This probably only works right on c++ projects. It will figure out the
393 item being created by the project using the breadcrumbs provided.
394 """
395
396 # these will need to be filled for us to have correctly determined what the project creates.
397 project_name = ""
398 config_type = ""
399
400 self.read_file_data(filename)
401 contents = self.file_lines
402 while (len(contents) > 0):
403 # get the next line out of the input.
404 next_line = contents[0]
405 # drop that line from the remaining items.
406 contents = contents[1:]
407
408 # we need the ProjectName or RootNamespace.
409 #hmmm: csproj seems like they will work with this scheme already.
410 if project_name == "":
411 temp = self.extract_xml_tag(next_line, "RootNamespace")
412 if (temp != ""):
413 project_name = temp
414# print("found project name of " + project_name)
415 continue
416
417 # we look for the ConfigurationType, which tells us:
418 # DynamicLibrary | Library | StaticLibrary | Application | WinExe | Exe | Utility
419 if config_type == "":
420 temp = self.extract_xml_tag(next_line, "ConfigurationType")
421 if (temp != ""):
422 config_type = temp
423# print("found config type of " + config_type)
424 continue
425 temp = self.extract_xml_tag(next_line, "OutputType")
426 if (temp != ""):
427 config_type = temp
428# print("found output type of " + config_type)
429 continue
430
431 if (config_type != "") and (project_name != ""):
432 asset_name = project_name
433 if (config_type == "DynamicLibrary"): asset_name += ".dll"
434 elif (config_type == "Library"): asset_name += ".dll"
435 elif (config_type == "StaticLibrary"): asset_name += ".lib"
436 elif (config_type == "Application"): asset_name += ".exe"
437 elif (config_type == "WinExe"): asset_name += ".exe"
438 elif (config_type == "Exe"): asset_name += ".exe"
439 elif (config_type == "Utility"): return ""
440 else:
441 print("unknown configuration type: " + config_type + "\nin proj file: " + filename)
442 return ""
443 # we think we're successful in figuring out what should be created.
444 return asset_name
445
446 return "" # failed to find right lines.
447
448
449
451 """ traverses the directory in "dir" and finds all the project files.
452
453 the project files are returned as a massive list.
454 """
455
456#hmmm: important optimization for walk; if the file where we store these things exists,
457# read directly out of that file instead of redoing hierarchy. people can
458# delete the file when they want a fresh look.
459
460 to_return = []
461
462 for root, dirs, files in os.walk(dir):
463#demo code from web:
464# print root, "consumes",
465# print sum(os.path.getsize(os.path.join(root, name)) for name in files),
466# print "bytes in", len(files), "non-directory files"
467 if 'CVS' in dirs:
468 dirs.remove('CVS') # don't visit CVS directories.
469 elif '.svn' in dirs:
470 dirs.remove('.svn') # don't visit subversion directories either.
471 for curr_file in files:
472 indy = len(curr_file) - 4
473 # see if the file has the right ending to be a visual studio project file.
474 if curr_file[indy:].lower() == 'proj':
475 full_path = os.path.join(root, curr_file)
476# print("full path: " + full_path)
477 # now add this good one to the returned list of projects.
478 to_return.append(full_path)
479 return to_return
480
481 def find_relative_path(self, our_path, project_file):
482 """ calculates path between directory at "our_path" to the location of "project_file".
483
484 this assumes that the locations are actually rooted at the same place; otherwise there is
485 no path between the locations. the location at "our_path" is considered to be the source,
486 or where we start out. the location for "project_file" is the target location.
487 """
488
489 # canonicalize these to be linux paths. we want to be able to split on forward slashes.
490 sourcel = our_path.replace('\\', '/')
491 targee = project_file.replace('\\', '/')
492 # fracture the paths into their directory and filename components.
493 sourcel = str.split(sourcel, '/')
494 targee = str.split(targee, '/')
495 # remove last item, which should be project filename.
496 targee.pop()
497 # destroy identical elements until items don't match or one path is gone.
498 while (len(sourcel) and len(targee) and (sourcel[0] == targee[0])):
499 sourcel.pop(0)
500 targee.pop(0)
501# print("after dinner, sourcel now: " + " / ".join(sourcel))
502# print("and targee now: " + " / ".join(targee))
503 # we comput the directory prefix of dot-dots based on the number of elements left
504 # in the source path.
505 prefix = ""
506 for i in range(0, len(sourcel)):
507 prefix += "../"
508 print("calculated a prefix of: " + prefix)
509#prove it is right in unit test.
510 return prefix + "/".join(targee)
511
512
513
515 """ locates every project file in our list and determines the asset created by it.
516
517 this returns a dictionary of {asset=project} items. we index by asset way more frequently
518 than by project, so the asset name is used as our key.
519 """
520 to_return = {}
521 for proj in self.projects:
522 asset_found = self.find_asset_created(proj)
523 # make sure we don't record a bogus project with no useful asset.
524 if (asset_found == ""):
525 self.projects.remove(proj) # should be safe for our list iteration.
526 continue # skip adding the bogus one.
527 to_return[asset_found] = proj
528# print("proj " + proj + " creates: " + asset_found)
529 return to_return
530
531
532
533 def locate_referenced_projects(self, project):
534 """ finds all the libraries needed by the "project" file and returns their project files.
535 """
536 to_return = []
537 # find the libraries and such used by this project.
538 libs = self.extract_dependencies(project)
539# print("was told the libs used by " + project + " are:\n" + " ".join(libs))
540 # now find who creates those things.
541 for current in libs:
542 # if we know of the library in our list of assets, then we can use it right away.
543 if current in self.assets:
544 # this item exists and is created by one of our projects.
545 proj_needed = self.assets[current]
546# print("asset '" + current + "' created by: " + proj_needed)
547 to_return.append(proj_needed)
548 continue
549 # if we didn't find the thing with it's current name, we'll see if we can convert
550 # it into a dll and look for that instead.
551# print("looking at piece: " + current[len(current) - 4:])
552 if current[len(current) - 4:] == ".lib":
553# print("found rogue we can convert to a dll to find: " + current)
554 current = current[0:-4] + ".dll"
555# print("new name: " + current)
556 if current in self.assets:
557 proj_needed = self.assets[current]
558# print("found asset '" + current + "' created by: " + proj_needed)
559 to_return.append(proj_needed)
560 continue
561# else:
562# print("could not find '" + current + "' as an asset that we know how to create.");
563 return to_return
564
565 def remove_redundant_references(self, project):
566 """ cleans out any references in "project" to assets that we intend to update.
567
568 this actually modifies the file. it had better be right.
569 """
570#load file data for the thing
571#find references
572 #see if reference is one we know about
573 #if so, zap it out of file contents
574#write file back out
575
576
577
578 def unit_test(self):
579 """ a sort-of unit test for the functions in this script.
580
581 currently geared for manual inspection of the test results.
582 """
583 print("testing some of the methods...")
584 test_file = ""
585 if len(self.arguments) > 1:
586 test_file = self.arguments[1]
587 if test_file == "": test_file = os.getenv("FEISTY_MEOW_APEX") + "/nucleus/applications/nechung/nechung.vcxproj"
588 print("test file is: " + test_file)
589
590 guid = self.extract_guid_from_project_file(test_file)
591 print("from proj, got a guid of " + guid)
592
593 refs = self.find_all_project_references(test_file)
594 print("refs list is: " + " ".join(refs))
595
596# libs = self.extract_dependencies(test_file)
597# print("was told the libs used are: " + " ".join(libs))
598
599 asset = self.find_asset_created(test_file)
600 print("our created asset is: " + asset)
601
602# print("walked directories got:\n" + " ".join(fixit.projects))
603
604# print("assets found are:\n" + " ".join(fixit.assets))
605
606 if (len(fixit.projects) > 0):
607 rando = random.randint(0, len(fixit.projects) - 1)
608 print("index chosen to examine: {0}".format(rando))
609
610 relpath = self.find_relative_path(os.path.dirname(test_file), fixit.projects[rando])
611 print("found relative path from source:\n " + test_file)
612 print("to target:\n " + fixit.projects[rando])
613 print("is this =>\n " + relpath)
614
615 full_refs = self.locate_referenced_projects(test_file)
616 print("refs found are:\n" + " ".join(full_refs))
617
618 self.remove_redundant_references(test_file)
619 print("we just munged your file! go check it! no references should remain that are in our new list.")
620
621#still needed:
622# remove any existing references that we now have a replacement for.
623# base this on the basename of the project file? blah.vcxproj already there e.g.
624# spit out xml form of references for the dependent projects.
625# put new references into the right place in file.
626
627
628
629
630# run the script if we are non-interactive.
631if __name__ == "__main__":
632 import sys
633 fixit = fix_project_references(sys.argv)
634
635 # comment this out when script is working.
636 fixit.unit_test()
637
638 print("we're bailing before doing anything real...")
639 exit(3)
640
641 fixit.repair_project_references()
642
643
644
645# parking lot of things to do in future:
646
647
#define open
Definition Xos2defs.h:36
extract_xml_attribute(self, file_line, tag_name, attribute_name)
Useful support functions for unit testing, especially within hoople.
Definition unit_base.cpp:35