6 class fix_project_references:
7 """ This tool fixes visual studio 2010 projects to have the proper project references.
9 Project files need to refer to other project files that they are dependent on if visual
10 studio is to build them properly. This is a painful task when done manually, but luckily
11 this script automates the process for you.
12 It requires an environment variable called BUILD_TOP that points at the top of all files
13 included in a code base. This is used to find the dependent projects.
18 def __init__(self, argv):
19 """ Initializes the class with a set of arguments to work with.
21 The arguments need to be in the form described by print_instructions().
23 self.arguments = argv # remembers the command line for us.
24 self.file_buffer = "" # use for file i/o in the class.
25 # initializes the list of projects found for the current source code hierarchy.
26 #hmmm: hier top denoted right now by the variable BUILD_TOP. may want to document that.
27 src_dir = os.getenv("BUILD_TOP")
28 # projects and assets should keep in step, where project[x] is known to create asset[x].
29 self.projects = self.walk_directory_for_projects(src_dir) # list of project files.
30 self.assets = self.locate_all_assets() # dictionary of assets created by project files.
35 def print_instructions(self):
36 """ Shows the instructions for using this class. """
38 This script will replace all occurrences of a phrase you specify in a set of files. The
39 replacement process will be careful about C and C++ syntax and will not replace occurrences
40 within comments or which are not "complete" phrases (due to other alpha-numeric characters
41 that abut the phrase). The arguments to the script are:
43 {0}: PhraseToReplace ReplacementPhrase File1 [File2 ...]
45 For example, if the phrase to replace is Goop, it will be replaced in these contexts:
49 but it will not be found in these contexts:
53 """.format(self.arguments[0]))
56 def validate_and_consume_command_line(self):
57 """ Performs command line argument handling. """
58 arg_count = len(self.arguments)
59 # for i in range(1, arg_count):
60 # print("i is {0}, arg is {1}".format(i, self.arguments[i]))
61 # we need more than 2 arguments, since there needs to be at least one file also.
64 self.phrase_to_replace = self.arguments[1]
65 self.replacement_bit = self.arguments[2]
66 print("got phrase to replace: \'{0}\' and replacement: \'{1}\'".format(self.phrase_to_replace, self.replacement_bit))
67 self.files = self.arguments[3:]
72 #hmmm: are these good enough to become part of a file library?
74 def read_file_data(self, filename):
75 """ loads the file into our memory buffer for processing. """
77 our_file = open(filename, "rb")
79 self.file_buffer = our_file.read()
81 print("There was an error reading the file {0}".format(filename))
86 print("There was an error opening the file {0}".format(filename))
88 self.file_lines = self.file_buffer.splitlines()
91 def write_file_data(self, filename):
92 """ takes the processed buffer and sends it back out to the filename. """
93 # output_filename = filename + ".new" # safe testing version.
94 output_filename = filename
96 our_file = open(output_filename, "wb")
98 self.file_buffer = our_file.write(self.processed_buffer)
100 print("There was an error writing the file {0}".format(output_filename))
105 print("There was an error opening the file {0}".format(output_filename))
112 def is_alphanumeric(self, check_char):
113 """ given a character, this returns true if it's between a-z, A-Z or 0-9. """
114 if (check_char[0] == "_"):
116 if ( (check_char[0] <= "z") and (check_char[0] >= "a")):
118 if ( (check_char[0] <= "Z") and (check_char[0] >= "A")):
120 if ( (check_char[0] <= "9") and (check_char[0] >= "0")):
125 def replace_within_string(self, fix_string):
126 """ given a string to fix, this replaces all appropriate locations of the phrase. """
128 # print("got to replace within string")
129 while (indy < len(fix_string)):
130 # locate next occurrence of replacement text, if any.
131 indy = fix_string.find(self.phrase_to_replace, indy)
132 # print("find indy={0}".format(indy))
134 # print("found occurrence of replacement string")
135 # we found an occurrence, but we have to validate it's separated enough.
136 char_before = "?" # simple default that won't fail our check.
139 char_before = fix_string[indy-1]
140 if (indy + len(self.phrase_to_replace) < len(fix_string) - 1):
141 char_after = fix_string[indy+len(self.phrase_to_replace)]
142 # print("char before {0}, char after {1}".format(char_before, char_after))
143 if (not self.is_alphanumeric(char_before) and not self.is_alphanumeric(char_after)):
144 # this looks like a good candidate for replacement.
145 fix_string = "{0}{1}{2}".format(fix_string[0:indy], self.replacement_bit, fix_string[indy+len(self.phrase_to_replace):])
146 # print("changed string to: {0}".format(fix_string))
149 indy += 1 # no matches means we have to keep skipping forward.
150 return fix_string # give back processed form.
153 def process_file_data(self):
154 """ iterates through the stored version of the file and replaces the phrase. """
155 self.state = self.EATING_NORMAL_TEXT;
156 # clear out any previously processed text.
157 self.processed_buffer = "" # reset our new version of the file contents.
158 self.normal_accumulator = ""
159 self.comment_accumulator = ""
160 # iterate through the file's lines.
161 contents = self.file_lines
162 while (len(contents) > 0):
163 # get the next line out of the input.
164 next_line = contents[0]
165 # drop that line from the remaining items.
166 contents = contents[1:]
167 # print("next line: {0}".format(next_line))
168 # decide if we need a state transition.
170 if ((len(next_line) > 0) and (self.state == self.EATING_NORMAL_TEXT) and ('/' in next_line)):
171 # loop to catch cases where multiple slashes are in line and one IS a comment.
172 while (indy < len(next_line)):
173 # locate next slash, if any.
174 indy = next_line.find('/', indy)
177 if ((len(next_line) > indy + 1) and (next_line[indy + 1] == '/')):
178 # switch states and handle any pent-up text.
179 self.normal_accumulator += next_line[0:indy] # get last tidbit before comment start.
180 next_line = next_line[indy:] # keep only the stuff starting at slash.
181 self.state = self.EATING_ONELINE_COMMENT
182 # print("state => oneline comment")
183 self.emit_normal_accumulator()
185 if ((len(next_line) > indy + 1) and (next_line[indy + 1] == '*')):
186 # switch states and deal with accumulated text.
187 self.normal_accumulator += next_line[0:indy] # get last tidbit before comment start.
188 next_line = next_line[indy:] # keep only the stuff starting at slash.
189 self.state = self.EATING_MULTILINE_COMMENT
190 # print("state => multiline comment")
191 self.emit_normal_accumulator()
193 indy += 1 # no matches means we have to keep skipping forward.
195 # now handle things appropriately for our current state.
196 if (self.state == self.EATING_NORMAL_TEXT):
197 # add the text to the normal accumulator.
198 # print("would handle normal text")
199 self.normal_accumulator += next_line + "\n"
200 elif (self.state == self.EATING_ONELINE_COMMENT):
201 # save the text in comment accumulator.
202 # print("would handle oneline comment")
203 self.comment_accumulator += next_line + "\n"
204 self.emit_comment_accumulator()
205 self.state = self.EATING_NORMAL_TEXT
206 elif (self.state == self.EATING_MULTILINE_COMMENT):
207 # save the text in comment accumulator.
208 # print("would handle multiline comment")
209 self.comment_accumulator += next_line + "\n"
210 # check for whether the multi-line comment is completed on this line.
211 if ("*/" in next_line):
212 # print("found completion for multiline comment on line.")
213 self.emit_comment_accumulator()
214 self.state = self.EATING_NORMAL_TEXT
215 # verify we're not in the wrong state still.
216 if (self.state == self.EATING_MULTILINE_COMMENT):
217 print("file seems to have unclosed multi-line comment.")
218 # last step is to spit out whatever was trailing in the accumulator.
219 self.emit_normal_accumulator()
220 # if we got to here, we seem to have happily consumed the file.
224 def replace_all_occurrences(self):
225 """ Orchestrates the process of replacing the phrases. """
226 # process our command line arguments to see what we need to do.
227 try_command_line = self.validate_and_consume_command_line()
228 if (try_command_line != True):
229 print("failed to process the command line...\n")
230 self.print_instructions()
232 # iterate through the list of files we were given and process them.
233 for i in range(0, len(self.files)):
234 print("file {0} is \'{1}\'".format(i, self.files[i]))
235 worked = self.read_file_data(self.files[i])
236 if (worked is False):
237 print("skipping since file read failed on: {0}".format(self.files[i]))
239 # print("{0} got file contents:\n{1}".format(self.files[i], self.file_lines))
240 worked = self.process_file_data()
241 if (worked is False):
242 print("skipping, since processing failed on: {0}".format(self.files[i]))
244 worked = self.write_file_data(self.files[i])
245 if (worked is False):
246 print("writing file back failed on: {0}".format(self.files[i]))
247 print("finished processing all files.")
251 def repair_project_references(self):
252 """ the main entry point to the project fixing process.
254 Operates on one project file at a time by:
255 1) finding all libraries (lib files) used by the project A,
256 2) locating the external project that creates each lib file,
257 3) adding a reference to the external projects to our project A.
259 We rely on some important assumptions to get this done:
260 1) project names and project file names are unique across the codebase,
261 2) the structure of the source code hierarchies uses a compatible architecture,
265 print("repair is unimplemented")
269 def extract_xml_tag(self, file_line, tag_name):
270 """ locates an XML tag with "tag_name" and returns the contents of the tag.
272 this currently assumes that the start tag, contents, and end tag are all on the same
273 line of text (which is not a very good assumption in general).
275 if ('<' + tag_name in file_line):
276 # find the greater than character.
277 gt_pos = file_line.find('>', 0)
278 if (gt_pos < 0): return "" # fail.
279 tag_close_pos = file_line.find('</' + tag_name, gt_pos + 1);
280 if (tag_close_pos < 0): return "" # fail.
281 return file_line[gt_pos + 1 : tag_close_pos]
282 return "" # failed to find anything relevant.
284 def extract_xml_attribute(self, file_line, tag_name, attribute_name):
285 """ locates an XML tag with "tag_name" and returns the value of the "attribute_name".
288 if ('<' + tag_name not in file_line): return ""
289 if (attribute_name not in file_line): return ""
290 attrib_pos = file_line.find(attribute_name, 0)
291 # find the first quote around the attribute.
292 quote_pos = file_line.find('"', attrib_pos)
293 if (quote_pos < 0): return "" # fail.
294 quote_close_pos = file_line.find('"', quote_pos + 1)
295 if (quote_close_pos < 0): return "" # fail.
296 return file_line[quote_pos + 1 : quote_close_pos]
300 def extract_xml_tag_from_file(self, filename, tag_name):
301 """ reads in a file and extracts the contents of a particular XML tag.
303 may not want a file read here. better to have a nice established way for
304 dealing with the existing buffer.
306 self.read_file_data(filename)
307 contents = self.file_lines
308 while (len(contents) > 0):
309 # get the next line out of the input.
310 next_line = contents[0]
311 # drop that line from the remaining items.
312 contents = contents[1:]
313 #hmmm: maybe bad assumption thinking all on one line...
314 found = self.extract_xml_tag(next_line, tag_name)
315 if (found != ""): return found
316 return "" # failed to find anything relevant.
320 def extract_guid_from_project_file(self, filename):
321 """ reads in a visual studio project file and figures out that project's GUID.
323 note that this will fail horribly if the project has been messed with and is no
324 longer in microsoft's official format.
326 return self.extract_xml_tag_from_file(filename, 'ProjectGuid')
328 def extract_filename_from_project_reference(self, file_line):
329 """ given a ProjectReference line, this pulls out just the filename involved.
331 return self.extract_xml_attribute(file_line, "ProjectReference", "Include")
333 def find_all_project_references(self, filename):
334 """ reads in a visual studio project file and locates all references.
336 self.read_file_data(filename)
337 contents = self.file_lines
339 while (len(contents) > 0):
340 # get the next line out of the input.
341 next_line = contents[0]
342 # drop that line from the remaining items.
343 contents = contents[1:]
344 ref = self.extract_filename_from_project_reference(next_line)
346 #print("got reference: " + os.path.basename(ref) + " => " + ref)
347 refs_list.append(ref)
350 def parse_dependency_line(self, line):
351 """ given an AdditionalDependencies line, this finds the libs listed.
353 just_libs = self.extract_xml_tag(line, "AdditionalDependencies")
354 if (just_libs == ""): return []
355 lib_list = just_libs.split(';')
356 # here we scan the list and remove any known-stupid entries.
357 for i in range(1, len(lib_list)):
358 if (lib_list[i] == '%(AdditionalDependencies)'):
359 lib_list = lib_list[0 : i] + lib_list[i + 1 : len(lib_list)]
363 #hmmm: could be a problem if the debug and release values differ.
364 def extract_dependencies(self, filename):
365 """ reads in a visual studio project file and locates all dependencies.
367 This will produce a list of the lib files used by c++ projects. These
368 are what we need to link up to their providing projects, if they're
369 actually things that we build.
371 self.read_file_data(filename)
372 contents = self.file_lines
373 while (len(contents) > 0):
374 # get the next line out of the input.
375 next_line = contents[0]
376 # drop that line from the remaining items.
377 contents = contents[1:]
378 if ('AdditionalDependencies' in next_line):
379 deps = self.parse_dependency_line(next_line)
381 return () # failed to find right line.
385 #hmmm: could also be a problem if the debug and release values differ.
386 def find_asset_created(self, filename):
387 """ determines the asset created by a visual studio project file.
389 This probably only works right on c++ projects. It will figure out the
390 item being created by the project using the breadcrumbs provided.
393 # these will need to be filled for us to have correctly determined what the project creates.
397 self.read_file_data(filename)
398 contents = self.file_lines
399 while (len(contents) > 0):
400 # get the next line out of the input.
401 next_line = contents[0]
402 # drop that line from the remaining items.
403 contents = contents[1:]
405 # we need the ProjectName or RootNamespace.
406 #hmmm: csproj seems like they will work with this scheme already.
407 if project_name == "":
408 temp = self.extract_xml_tag(next_line, "RootNamespace")
411 # print("found project name of " + project_name)
414 # we look for the ConfigurationType, which tells us:
415 # DynamicLibrary | Library | StaticLibrary | Application | WinExe | Exe | Utility
416 if config_type == "":
417 temp = self.extract_xml_tag(next_line, "ConfigurationType")
420 # print("found config type of " + config_type)
422 temp = self.extract_xml_tag(next_line, "OutputType")
425 # print("found output type of " + config_type)
428 if (config_type != "") and (project_name != ""):
429 asset_name = project_name
430 if (config_type == "DynamicLibrary"): asset_name += ".dll"
431 elif (config_type == "Library"): asset_name += ".dll"
432 elif (config_type == "StaticLibrary"): asset_name += ".lib"
433 elif (config_type == "Application"): asset_name += ".exe"
434 elif (config_type == "WinExe"): asset_name += ".exe"
435 elif (config_type == "Exe"): asset_name += ".exe"
436 elif (config_type == "Utility"): return ""
438 print("unknown configuration type: " + config_type + "\nin proj file: " + filename)
440 # we think we're successful in figuring out what should be created.
443 return "" # failed to find right lines.
447 def walk_directory_for_projects(self, dir):
448 """ traverses the directory in "dir" and finds all the project files.
450 the project files are returned as a massive list.
453 #hmmm: important optimization for walk; if the file where we store these things exists,
454 # read directly out of that file instead of redoing hierarchy. people can
455 # delete the file when they want a fresh look.
459 for root, dirs, files in os.walk(dir):
461 # print root, "consumes",
462 # print sum(os.path.getsize(os.path.join(root, name)) for name in files),
463 # print "bytes in", len(files), "non-directory files"
465 dirs.remove('CVS') # don't visit CVS directories.
467 dirs.remove('.svn') # don't visit subversion directories either.
468 for curr_file in files:
469 indy = len(curr_file) - 4
470 # see if the file has the right ending to be a visual studio project file.
471 if curr_file[indy:].lower() == 'proj':
472 full_path = os.path.join(root, curr_file)
473 # print("full path: " + full_path)
474 # now add this good one to the returned list of projects.
475 to_return.append(full_path)
478 def find_relative_path(self, our_path, project_file):
479 """ calculates path between directory at "our_path" to the location of "project_file".
481 this assumes that the locations are actually rooted at the same place; otherwise there is
482 no path between the locations. the location at "our_path" is considered to be the source,
483 or where we start out. the location for "project_file" is the target location.
486 # canonicalize these to be linux paths. we want to be able to split on forward slashes.
487 sourcel = our_path.replace('\\', '/')
488 targee = project_file.replace('\\', '/')
489 # fracture the paths into their directory and filename components.
490 sourcel = str.split(sourcel, '/')
491 targee = str.split(targee, '/')
492 # remove last item, which should be project filename.
494 # destroy identical elements until items don't match or one path is gone.
495 while (len(sourcel) and len(targee) and (sourcel[0] == targee[0])):
498 # print("after dinner, sourcel now: " + " / ".join(sourcel))
499 # print("and targee now: " + " / ".join(targee))
500 # we comput the directory prefix of dot-dots based on the number of elements left
501 # in the source path.
503 for i in range(0, len(sourcel)):
505 print("calculated a prefix of: " + prefix)
506 #prove it is right in unit test.
507 return prefix + "/".join(targee)
511 def locate_all_assets(self):
512 """ locates every project file in our list and determines the asset created by it.
514 this returns a dictionary of {asset=project} items. we index by asset way more frequently
515 than by project, so the asset name is used as our key.
518 for proj in self.projects:
519 asset_found = self.find_asset_created(proj)
520 # make sure we don't record a bogus project with no useful asset.
521 if (asset_found == ""):
522 self.projects.remove(proj) # should be safe for our list iteration.
523 continue # skip adding the bogus one.
524 to_return[asset_found] = proj
525 # print("proj " + proj + " creates: " + asset_found)
530 def locate_referenced_projects(self, project):
531 """ finds all the libraries needed by the "project" file and returns their project files.
534 # find the libraries and such used by this project.
535 libs = self.extract_dependencies(project)
536 # print("was told the libs used by " + project + " are:\n" + " ".join(libs))
537 # now find who creates those things.
539 # if we know of the library in our list of assets, then we can use it right away.
540 if current in self.assets:
541 # this item exists and is created by one of our projects.
542 proj_needed = self.assets[current]
543 # print("asset '" + current + "' created by: " + proj_needed)
544 to_return.append(proj_needed)
546 # if we didn't find the thing with it's current name, we'll see if we can convert
547 # it into a dll and look for that instead.
548 # print("looking at piece: " + current[len(current) - 4:])
549 if current[len(current) - 4:] == ".lib":
550 # print("found rogue we can convert to a dll to find: " + current)
551 current = current[0:-4] + ".dll"
552 # print("new name: " + current)
553 if current in self.assets:
554 proj_needed = self.assets[current]
555 # print("found asset '" + current + "' created by: " + proj_needed)
556 to_return.append(proj_needed)
559 # print("could not find '" + current + "' as an asset that we know how to create.");
562 def remove_redundant_references(self, project):
563 """ cleans out any references in "project" to assets that we intend to update.
565 this actually modifies the file. it had better be right.
567 #load file data for the thing
569 #see if reference is one we know about
570 #if so, zap it out of file contents
576 """ a sort-of unit test for the functions in this script.
578 currently geared for manual inspection of the test results.
580 print("testing some of the methods...")
582 if len(self.arguments) > 1:
583 test_file = self.arguments[1]
584 if test_file == "": test_file = os.getenv("FEISTY_MEOW_DIR") + "/nucleus/applications/nechung/nechung.vcxproj"
585 print("test file is: " + test_file)
587 guid = self.extract_guid_from_project_file(test_file)
588 print("from proj, got a guid of " + guid)
590 refs = self.find_all_project_references(test_file)
591 print("refs list is: " + " ".join(refs))
593 # libs = self.extract_dependencies(test_file)
594 # print("was told the libs used are: " + " ".join(libs))
596 asset = self.find_asset_created(test_file)
597 print("our created asset is: " + asset)
599 # print("walked directories got:\n" + " ".join(fixit.projects))
601 # print("assets found are:\n" + " ".join(fixit.assets))
603 if (len(fixit.projects) > 0):
604 rando = random.randint(0, len(fixit.projects) - 1)
605 print("index chosen to examine: {0}".format(rando))
607 relpath = self.find_relative_path(os.path.dirname(test_file), fixit.projects[rando])
608 print("found relative path from source:\n " + test_file)
609 print("to target:\n " + fixit.projects[rando])
610 print("is this =>\n " + relpath)
612 full_refs = self.locate_referenced_projects(test_file)
613 print("refs found are:\n" + " ".join(full_refs))
615 self.remove_redundant_references(test_file)
616 print("we just munged your file! go check it! no references should remain that are in our new list.")
619 # remove any existing references that we now have a replacement for.
620 # base this on the basename of the project file? blah.vcxproj already there e.g.
621 # spit out xml form of references for the dependent projects.
622 # put new references into the right place in file.
627 # run the script if we are non-interactive.
628 if __name__ == "__main__":
630 fixit = fix_project_references(sys.argv)
632 # comment this out when script is working.
635 print("we're bailing before doing anything real...")
638 fixit.repair_project_references()
642 # parking lot of things to do in future: