6 class fix_project_references:
7 """ This tool fixes visual studio 2010 projects to have the proper project references.
9 Project files need to refer to other project files that they are dependent on if visual
10 studio is to build them properly. This is a painful task when done manually, but luckily
11 this script automates the process for you.
12 It requires an environment variable called BUILD_TOP that points at the top of all files
13 included in a code base. This is used to find the dependent projects.
16 hmmm: this tool is NOT finished.
21 def __init__(self, argv):
22 """ Initializes the class with a set of arguments to work with.
24 The arguments need to be in the form described by print_instructions().
26 self.arguments = argv # remembers the command line for us.
27 self.file_buffer = "" # use for file i/o in the class.
28 # initializes the list of projects found for the current source code hierarchy.
29 #hmmm: hier top denoted right now by the variable BUILD_TOP. may want to document that.
30 src_dir = os.getenv("BUILD_TOP")
31 # projects and assets should keep in step, where project[x] is known to create asset[x].
32 self.projects = self.walk_directory_for_projects(src_dir) # list of project files.
33 self.assets = self.locate_all_assets() # dictionary of assets created by project files.
38 def print_instructions(self):
39 """ Shows the instructions for using this class. """
41 This script will replace all occurrences of a phrase you specify in a set of files. The
42 replacement process will be careful about C and C++ syntax and will not replace occurrences
43 within comments or which are not "complete" phrases (due to other alpha-numeric characters
44 that abut the phrase). The arguments to the script are:
46 {0}: PhraseToReplace ReplacementPhrase File1 [File2 ...]
48 For example, if the phrase to replace is Goop, it will be replaced in these contexts:
52 but it will not be found in these contexts:
56 """.format(self.arguments[0]))
59 def validate_and_consume_command_line(self):
60 """ Performs command line argument handling. """
61 arg_count = len(self.arguments)
62 # for i in range(1, arg_count):
63 # print("i is {0}, arg is {1}".format(i, self.arguments[i]))
64 # we need more than 2 arguments, since there needs to be at least one file also.
67 self.phrase_to_replace = self.arguments[1]
68 self.replacement_bit = self.arguments[2]
69 print("got phrase to replace: \'{0}\' and replacement: \'{1}\'".format(self.phrase_to_replace, self.replacement_bit))
70 self.files = self.arguments[3:]
75 #hmmm: are these good enough to become part of a file library?
77 def read_file_data(self, filename):
78 """ loads the file into our memory buffer for processing. """
80 our_file = open(filename, "rb")
82 self.file_buffer = our_file.read()
84 print("There was an error reading the file {0}".format(filename))
89 print("There was an error opening the file {0}".format(filename))
91 self.file_lines = self.file_buffer.splitlines()
94 def write_file_data(self, filename):
95 """ takes the processed buffer and sends it back out to the filename. """
96 # output_filename = filename + ".new" # safe testing version.
97 output_filename = filename
99 our_file = open(output_filename, "wb")
101 self.file_buffer = our_file.write(self.processed_buffer)
103 print("There was an error writing the file {0}".format(output_filename))
108 print("There was an error opening the file {0}".format(output_filename))
115 def is_alphanumeric(self, check_char):
116 """ given a character, this returns true if it's between a-z, A-Z or 0-9. """
117 if (check_char[0] == "_"):
119 if ( (check_char[0] <= "z") and (check_char[0] >= "a")):
121 if ( (check_char[0] <= "Z") and (check_char[0] >= "A")):
123 if ( (check_char[0] <= "9") and (check_char[0] >= "0")):
128 def replace_within_string(self, fix_string):
129 """ given a string to fix, this replaces all appropriate locations of the phrase. """
131 # print("got to replace within string")
132 while (indy < len(fix_string)):
133 # locate next occurrence of replacement text, if any.
134 indy = fix_string.find(self.phrase_to_replace, indy)
135 # print("find indy={0}".format(indy))
137 # print("found occurrence of replacement string")
138 # we found an occurrence, but we have to validate it's separated enough.
139 char_before = "?" # simple default that won't fail our check.
142 char_before = fix_string[indy-1]
143 if (indy + len(self.phrase_to_replace) < len(fix_string) - 1):
144 char_after = fix_string[indy+len(self.phrase_to_replace)]
145 # print("char before {0}, char after {1}".format(char_before, char_after))
146 if (not self.is_alphanumeric(char_before) and not self.is_alphanumeric(char_after)):
147 # this looks like a good candidate for replacement.
148 fix_string = "{0}{1}{2}".format(fix_string[0:indy], self.replacement_bit, fix_string[indy+len(self.phrase_to_replace):])
149 # print("changed string to: {0}".format(fix_string))
152 indy += 1 # no matches means we have to keep skipping forward.
153 return fix_string # give back processed form.
156 def process_file_data(self):
157 """ iterates through the stored version of the file and replaces the phrase. """
158 self.state = self.EATING_NORMAL_TEXT;
159 # clear out any previously processed text.
160 self.processed_buffer = "" # reset our new version of the file contents.
161 self.normal_accumulator = ""
162 self.comment_accumulator = ""
163 # iterate through the file's lines.
164 contents = self.file_lines
165 while (len(contents) > 0):
166 # get the next line out of the input.
167 next_line = contents[0]
168 # drop that line from the remaining items.
169 contents = contents[1:]
170 # print("next line: {0}".format(next_line))
171 # decide if we need a state transition.
173 if ((len(next_line) > 0) and (self.state == self.EATING_NORMAL_TEXT) and ('/' in next_line)):
174 # loop to catch cases where multiple slashes are in line and one IS a comment.
175 while (indy < len(next_line)):
176 # locate next slash, if any.
177 indy = next_line.find('/', indy)
180 if ((len(next_line) > indy + 1) and (next_line[indy + 1] == '/')):
181 # switch states and handle any pent-up text.
182 self.normal_accumulator += next_line[0:indy] # get last tidbit before comment start.
183 next_line = next_line[indy:] # keep only the stuff starting at slash.
184 self.state = self.EATING_ONELINE_COMMENT
185 # print("state => oneline comment")
186 self.emit_normal_accumulator()
188 if ((len(next_line) > indy + 1) and (next_line[indy + 1] == '*')):
189 # switch states and deal with accumulated text.
190 self.normal_accumulator += next_line[0:indy] # get last tidbit before comment start.
191 next_line = next_line[indy:] # keep only the stuff starting at slash.
192 self.state = self.EATING_MULTILINE_COMMENT
193 # print("state => multiline comment")
194 self.emit_normal_accumulator()
196 indy += 1 # no matches means we have to keep skipping forward.
198 # now handle things appropriately for our current state.
199 if (self.state == self.EATING_NORMAL_TEXT):
200 # add the text to the normal accumulator.
201 # print("would handle normal text")
202 self.normal_accumulator += next_line + "\n"
203 elif (self.state == self.EATING_ONELINE_COMMENT):
204 # save the text in comment accumulator.
205 # print("would handle oneline comment")
206 self.comment_accumulator += next_line + "\n"
207 self.emit_comment_accumulator()
208 self.state = self.EATING_NORMAL_TEXT
209 elif (self.state == self.EATING_MULTILINE_COMMENT):
210 # save the text in comment accumulator.
211 # print("would handle multiline comment")
212 self.comment_accumulator += next_line + "\n"
213 # check for whether the multi-line comment is completed on this line.
214 if ("*/" in next_line):
215 # print("found completion for multiline comment on line.")
216 self.emit_comment_accumulator()
217 self.state = self.EATING_NORMAL_TEXT
218 # verify we're not in the wrong state still.
219 if (self.state == self.EATING_MULTILINE_COMMENT):
220 print("file seems to have unclosed multi-line comment.")
221 # last step is to spit out whatever was trailing in the accumulator.
222 self.emit_normal_accumulator()
223 # if we got to here, we seem to have happily consumed the file.
227 def replace_all_occurrences(self):
228 """ Orchestrates the process of replacing the phrases. """
229 # process our command line arguments to see what we need to do.
230 try_command_line = self.validate_and_consume_command_line()
231 if (try_command_line != True):
232 print("failed to process the command line...\n")
233 self.print_instructions()
235 # iterate through the list of files we were given and process them.
236 for i in range(0, len(self.files)):
237 print("file {0} is \'{1}\'".format(i, self.files[i]))
238 worked = self.read_file_data(self.files[i])
239 if (worked is False):
240 print("skipping since file read failed on: {0}".format(self.files[i]))
242 # print("{0} got file contents:\n{1}".format(self.files[i], self.file_lines))
243 worked = self.process_file_data()
244 if (worked is False):
245 print("skipping, since processing failed on: {0}".format(self.files[i]))
247 worked = self.write_file_data(self.files[i])
248 if (worked is False):
249 print("writing file back failed on: {0}".format(self.files[i]))
250 print("finished processing all files.")
254 def repair_project_references(self):
255 """ the main entry point to the project fixing process.
257 Operates on one project file at a time by:
258 1) finding all libraries (lib files) used by the project A,
259 2) locating the external project that creates each lib file,
260 3) adding a reference to the external projects to our project A.
262 We rely on some important assumptions to get this done:
263 1) project names and project file names are unique across the codebase,
264 2) the structure of the source code hierarchies uses a compatible architecture,
268 print("repair is unimplemented")
272 def extract_xml_tag(self, file_line, tag_name):
273 """ locates an XML tag with "tag_name" and returns the contents of the tag.
275 this currently assumes that the start tag, contents, and end tag are all on the same
276 line of text (which is not a very good assumption in general).
278 if ('<' + tag_name in file_line):
279 # find the greater than character.
280 gt_pos = file_line.find('>', 0)
281 if (gt_pos < 0): return "" # fail.
282 tag_close_pos = file_line.find('</' + tag_name, gt_pos + 1);
283 if (tag_close_pos < 0): return "" # fail.
284 return file_line[gt_pos + 1 : tag_close_pos]
285 return "" # failed to find anything relevant.
287 def extract_xml_attribute(self, file_line, tag_name, attribute_name):
288 """ locates an XML tag with "tag_name" and returns the value of the "attribute_name".
291 if ('<' + tag_name not in file_line): return ""
292 if (attribute_name not in file_line): return ""
293 attrib_pos = file_line.find(attribute_name, 0)
294 # find the first quote around the attribute.
295 quote_pos = file_line.find('"', attrib_pos)
296 if (quote_pos < 0): return "" # fail.
297 quote_close_pos = file_line.find('"', quote_pos + 1)
298 if (quote_close_pos < 0): return "" # fail.
299 return file_line[quote_pos + 1 : quote_close_pos]
303 def extract_xml_tag_from_file(self, filename, tag_name):
304 """ reads in a file and extracts the contents of a particular XML tag.
306 may not want a file read here. better to have a nice established way for
307 dealing with the existing buffer.
309 self.read_file_data(filename)
310 contents = self.file_lines
311 while (len(contents) > 0):
312 # get the next line out of the input.
313 next_line = contents[0]
314 # drop that line from the remaining items.
315 contents = contents[1:]
316 #hmmm: maybe bad assumption thinking all on one line...
317 found = self.extract_xml_tag(next_line, tag_name)
318 if (found != ""): return found
319 return "" # failed to find anything relevant.
323 def extract_guid_from_project_file(self, filename):
324 """ reads in a visual studio project file and figures out that project's GUID.
326 note that this will fail horribly if the project has been messed with and is no
327 longer in microsoft's official format.
329 return self.extract_xml_tag_from_file(filename, 'ProjectGuid')
331 def extract_filename_from_project_reference(self, file_line):
332 """ given a ProjectReference line, this pulls out just the filename involved.
334 return self.extract_xml_attribute(file_line, "ProjectReference", "Include")
336 def find_all_project_references(self, filename):
337 """ reads in a visual studio project file and locates all references.
339 self.read_file_data(filename)
340 contents = self.file_lines
342 while (len(contents) > 0):
343 # get the next line out of the input.
344 next_line = contents[0]
345 # drop that line from the remaining items.
346 contents = contents[1:]
347 ref = self.extract_filename_from_project_reference(next_line)
349 #print("got reference: " + os.path.basename(ref) + " => " + ref)
350 refs_list.append(ref)
353 def parse_dependency_line(self, line):
354 """ given an AdditionalDependencies line, this finds the libs listed.
356 just_libs = self.extract_xml_tag(line, "AdditionalDependencies")
357 if (just_libs == ""): return []
358 lib_list = just_libs.split(';')
359 # here we scan the list and remove any known-stupid entries.
360 for i in range(1, len(lib_list)):
361 if (lib_list[i] == '%(AdditionalDependencies)'):
362 lib_list = lib_list[0 : i] + lib_list[i + 1 : len(lib_list)]
366 #hmmm: could be a problem if the debug and release values differ.
367 def extract_dependencies(self, filename):
368 """ reads in a visual studio project file and locates all dependencies.
370 This will produce a list of the lib files used by c++ projects. These
371 are what we need to link up to their providing projects, if they're
372 actually things that we build.
374 self.read_file_data(filename)
375 contents = self.file_lines
376 while (len(contents) > 0):
377 # get the next line out of the input.
378 next_line = contents[0]
379 # drop that line from the remaining items.
380 contents = contents[1:]
381 if ('AdditionalDependencies' in next_line):
382 deps = self.parse_dependency_line(next_line)
384 return () # failed to find right line.
388 #hmmm: could also be a problem if the debug and release values differ.
389 def find_asset_created(self, filename):
390 """ determines the asset created by a visual studio project file.
392 This probably only works right on c++ projects. It will figure out the
393 item being created by the project using the breadcrumbs provided.
396 # these will need to be filled for us to have correctly determined what the project creates.
400 self.read_file_data(filename)
401 contents = self.file_lines
402 while (len(contents) > 0):
403 # get the next line out of the input.
404 next_line = contents[0]
405 # drop that line from the remaining items.
406 contents = contents[1:]
408 # we need the ProjectName or RootNamespace.
409 #hmmm: csproj seems like they will work with this scheme already.
410 if project_name == "":
411 temp = self.extract_xml_tag(next_line, "RootNamespace")
414 # print("found project name of " + project_name)
417 # we look for the ConfigurationType, which tells us:
418 # DynamicLibrary | Library | StaticLibrary | Application | WinExe | Exe | Utility
419 if config_type == "":
420 temp = self.extract_xml_tag(next_line, "ConfigurationType")
423 # print("found config type of " + config_type)
425 temp = self.extract_xml_tag(next_line, "OutputType")
428 # print("found output type of " + config_type)
431 if (config_type != "") and (project_name != ""):
432 asset_name = project_name
433 if (config_type == "DynamicLibrary"): asset_name += ".dll"
434 elif (config_type == "Library"): asset_name += ".dll"
435 elif (config_type == "StaticLibrary"): asset_name += ".lib"
436 elif (config_type == "Application"): asset_name += ".exe"
437 elif (config_type == "WinExe"): asset_name += ".exe"
438 elif (config_type == "Exe"): asset_name += ".exe"
439 elif (config_type == "Utility"): return ""
441 print("unknown configuration type: " + config_type + "\nin proj file: " + filename)
443 # we think we're successful in figuring out what should be created.
446 return "" # failed to find right lines.
450 def walk_directory_for_projects(self, dir):
451 """ traverses the directory in "dir" and finds all the project files.
453 the project files are returned as a massive list.
456 #hmmm: important optimization for walk; if the file where we store these things exists,
457 # read directly out of that file instead of redoing hierarchy. people can
458 # delete the file when they want a fresh look.
462 for root, dirs, files in os.walk(dir):
464 # print root, "consumes",
465 # print sum(os.path.getsize(os.path.join(root, name)) for name in files),
466 # print "bytes in", len(files), "non-directory files"
468 dirs.remove('CVS') # don't visit CVS directories.
470 dirs.remove('.svn') # don't visit subversion directories either.
471 for curr_file in files:
472 indy = len(curr_file) - 4
473 # see if the file has the right ending to be a visual studio project file.
474 if curr_file[indy:].lower() == 'proj':
475 full_path = os.path.join(root, curr_file)
476 # print("full path: " + full_path)
477 # now add this good one to the returned list of projects.
478 to_return.append(full_path)
481 def find_relative_path(self, our_path, project_file):
482 """ calculates path between directory at "our_path" to the location of "project_file".
484 this assumes that the locations are actually rooted at the same place; otherwise there is
485 no path between the locations. the location at "our_path" is considered to be the source,
486 or where we start out. the location for "project_file" is the target location.
489 # canonicalize these to be linux paths. we want to be able to split on forward slashes.
490 sourcel = our_path.replace('\\', '/')
491 targee = project_file.replace('\\', '/')
492 # fracture the paths into their directory and filename components.
493 sourcel = str.split(sourcel, '/')
494 targee = str.split(targee, '/')
495 # remove last item, which should be project filename.
497 # destroy identical elements until items don't match or one path is gone.
498 while (len(sourcel) and len(targee) and (sourcel[0] == targee[0])):
501 # print("after dinner, sourcel now: " + " / ".join(sourcel))
502 # print("and targee now: " + " / ".join(targee))
503 # we comput the directory prefix of dot-dots based on the number of elements left
504 # in the source path.
506 for i in range(0, len(sourcel)):
508 print("calculated a prefix of: " + prefix)
509 #prove it is right in unit test.
510 return prefix + "/".join(targee)
514 def locate_all_assets(self):
515 """ locates every project file in our list and determines the asset created by it.
517 this returns a dictionary of {asset=project} items. we index by asset way more frequently
518 than by project, so the asset name is used as our key.
521 for proj in self.projects:
522 asset_found = self.find_asset_created(proj)
523 # make sure we don't record a bogus project with no useful asset.
524 if (asset_found == ""):
525 self.projects.remove(proj) # should be safe for our list iteration.
526 continue # skip adding the bogus one.
527 to_return[asset_found] = proj
528 # print("proj " + proj + " creates: " + asset_found)
533 def locate_referenced_projects(self, project):
534 """ finds all the libraries needed by the "project" file and returns their project files.
537 # find the libraries and such used by this project.
538 libs = self.extract_dependencies(project)
539 # print("was told the libs used by " + project + " are:\n" + " ".join(libs))
540 # now find who creates those things.
542 # if we know of the library in our list of assets, then we can use it right away.
543 if current in self.assets:
544 # this item exists and is created by one of our projects.
545 proj_needed = self.assets[current]
546 # print("asset '" + current + "' created by: " + proj_needed)
547 to_return.append(proj_needed)
549 # if we didn't find the thing with it's current name, we'll see if we can convert
550 # it into a dll and look for that instead.
551 # print("looking at piece: " + current[len(current) - 4:])
552 if current[len(current) - 4:] == ".lib":
553 # print("found rogue we can convert to a dll to find: " + current)
554 current = current[0:-4] + ".dll"
555 # print("new name: " + current)
556 if current in self.assets:
557 proj_needed = self.assets[current]
558 # print("found asset '" + current + "' created by: " + proj_needed)
559 to_return.append(proj_needed)
562 # print("could not find '" + current + "' as an asset that we know how to create.");
565 def remove_redundant_references(self, project):
566 """ cleans out any references in "project" to assets that we intend to update.
568 this actually modifies the file. it had better be right.
570 #load file data for the thing
572 #see if reference is one we know about
573 #if so, zap it out of file contents
579 """ a sort-of unit test for the functions in this script.
581 currently geared for manual inspection of the test results.
583 print("testing some of the methods...")
585 if len(self.arguments) > 1:
586 test_file = self.arguments[1]
587 if test_file == "": test_file = os.getenv("FEISTY_MEOW_APEX") + "/nucleus/applications/nechung/nechung.vcxproj"
588 print("test file is: " + test_file)
590 guid = self.extract_guid_from_project_file(test_file)
591 print("from proj, got a guid of " + guid)
593 refs = self.find_all_project_references(test_file)
594 print("refs list is: " + " ".join(refs))
596 # libs = self.extract_dependencies(test_file)
597 # print("was told the libs used are: " + " ".join(libs))
599 asset = self.find_asset_created(test_file)
600 print("our created asset is: " + asset)
602 # print("walked directories got:\n" + " ".join(fixit.projects))
604 # print("assets found are:\n" + " ".join(fixit.assets))
606 if (len(fixit.projects) > 0):
607 rando = random.randint(0, len(fixit.projects) - 1)
608 print("index chosen to examine: {0}".format(rando))
610 relpath = self.find_relative_path(os.path.dirname(test_file), fixit.projects[rando])
611 print("found relative path from source:\n " + test_file)
612 print("to target:\n " + fixit.projects[rando])
613 print("is this =>\n " + relpath)
615 full_refs = self.locate_referenced_projects(test_file)
616 print("refs found are:\n" + " ".join(full_refs))
618 self.remove_redundant_references(test_file)
619 print("we just munged your file! go check it! no references should remain that are in our new list.")
622 # remove any existing references that we now have a replacement for.
623 # base this on the basename of the project file? blah.vcxproj already there e.g.
624 # spit out xml form of references for the dependent projects.
625 # put new references into the right place in file.
630 # run the script if we are non-interactive.
631 if __name__ == "__main__":
633 fixit = fix_project_references(sys.argv)
635 # comment this out when script is working.
638 print("we're bailing before doing anything real...")
641 fixit.repair_project_references()
645 # parking lot of things to do in future: