1 #!/usr/bin/python
3 import os;
4 import random;
7  """ This tool fixes visual studio 2010 projects to have the proper project references.
9  Project files need to refer to other project files that they are dependent on if visual
10  studio is to build them properly. This is a painful task when done manually, but luckily
11  this script automates the process for you.
12  It requires an environment variable called BUILD_TOP that points at the top of all files
13  included in a code base. This is used to find the dependent projects.
16 hmmm: this tool is NOT finished.
17  """
21  def __init__(self, argv):
22  """ Initializes the class with a set of arguments to work with.
24  The arguments need to be in the form described by print_instructions().
25  """
26  self.argumentsarguments = argv # remembers the command line for us.
27  self.file_bufferfile_buffer = "" # use for file i/o in the class.
28  # initializes the list of projects found for the current source code hierarchy.
29 #hmmm: hier top denoted right now by the variable BUILD_TOP. may want to document that.
30  src_dir = os.getenv("BUILD_TOP")
31  # projects and assets should keep in step, where project[x] is known to create asset[x].
32  self.projectsprojects = self.walk_directory_for_projectswalk_directory_for_projects(src_dir) # list of project files.
33  self.assetsassets = self.locate_all_assetslocate_all_assets() # dictionary of assets created by project files.
37 #fix
38  def print_instructions(self):
39  """ Shows the instructions for using this class. """
40  print("""
41 This script will replace all occurrences of a phrase you specify in a set of files. The
42 replacement process will be careful about C and C++ syntax and will not replace occurrences
43 within comments or which are not "complete" phrases (due to other alpha-numeric characters
44 that abut the phrase). The arguments to the script are:
46  {0}: PhraseToReplace ReplacementPhrase File1 [File2 ...]
48 For example, if the phrase to replace is Goop, it will be replaced in these contexts:
49  Goop[32]
50  molo-Goop
51  *Goop
52 but it will not be found in these contexts:
53  // doop de Goop
54  rGoop
55  Goop23
56 """.format(self.argumentsarguments[0]))
58  #fix
60  """ Performs command line argument handling. """
61  arg_count = len(self.argumentsarguments)
62 # for i in range(1, arg_count):
63 # print("i is {0}, arg is {1}".format(i, self.arguments[i]))
64  # we need more than 2 arguments, since there needs to be at least one file also.
65  if arg_count < 4:
66  return False
67  self.phrase_to_replacephrase_to_replace = self.argumentsarguments[1]
68  self.replacement_bitreplacement_bit = self.argumentsarguments[2]
69  print("got phrase to replace: \'{0}\' and replacement: \'{1}\'".format(self.phrase_to_replacephrase_to_replace, self.replacement_bitreplacement_bit))
70  self.filesfiles = self.argumentsarguments[3:]
71  return True
75 #hmmm: are these good enough to become part of a file library?
77  def read_file_data(self, filename):
78  """ loads the file into our memory buffer for processing. """
79  try:
80  our_file = open(filename, "rb")
81  try:
82  self.file_bufferfile_buffer = our_file.read()
83  except IOError:
84  print("There was an error reading the file {0}".format(filename))
85  return False
86  finally:
87  our_file.close()
88  except IOError:
89  print("There was an error opening the file {0}".format(filename))
90  return False
91  self.file_linesfile_lines = self.file_bufferfile_buffer.splitlines()
92  return True
94  def write_file_data(self, filename):
95  """ takes the processed buffer and sends it back out to the filename. """
96 # output_filename = filename + ".new" # safe testing version.
97  output_filename = filename
98  try:
99  our_file = open(output_filename, "wb")
100  try:
101  self.file_bufferfile_buffer = our_file.write(self.processed_bufferprocessed_buffer)
102  except IOError:
103  print("There was an error writing the file {0}".format(output_filename))
104  return False
105  finally:
106  our_file.close()
107  except IOError:
108  print("There was an error opening the file {0}".format(output_filename))
109  return False
110  return True
114  #unused?
115  def is_alphanumeric(self, check_char):
116  """ given a character, this returns true if it's between a-z, A-Z or 0-9. """
117  if (check_char[0] == "_"):
118  return True
119  if ( (check_char[0] <= "z") and (check_char[0] >= "a")):
120  return True
121  if ( (check_char[0] <= "Z") and (check_char[0] >= "A")):
122  return True
123  if ( (check_char[0] <= "9") and (check_char[0] >= "0")):
124  return True
125  return False
127  #unused?
128  def replace_within_string(self, fix_string):
129  """ given a string to fix, this replaces all appropriate locations of the phrase. """
130  indy = 0
131 # print("got to replace within string")
132  while (indy < len(fix_string)):
133  # locate next occurrence of replacement text, if any.
134  indy = fix_string.find(self.phrase_to_replacephrase_to_replace, indy)
135 # print("find indy={0}".format(indy))
136  if (indy > -1):
137 # print("found occurrence of replacement string")
138  # we found an occurrence, but we have to validate it's separated enough.
139  char_before = "?" # simple default that won't fail our check.
140  char_after = "?"
141  if (indy > 0):
142  char_before = fix_string[indy-1]
143  if (indy + len(self.phrase_to_replacephrase_to_replace) < len(fix_string) - 1):
144  char_after = fix_string[indy+len(self.phrase_to_replacephrase_to_replace)]
145 # print("char before {0}, char after {1}".format(char_before, char_after))
146  if (not self.is_alphanumericis_alphanumeric(char_before) and not self.is_alphanumericis_alphanumeric(char_after)):
147  # this looks like a good candidate for replacement.
148  fix_string = "{0}{1}{2}".format(fix_string[0:indy], self.replacement_bitreplacement_bit, fix_string[indy+len(self.phrase_to_replacephrase_to_replace):])
149 # print("changed string to: {0}".format(fix_string))
150  else:
151  break
152  indy += 1 # no matches means we have to keep skipping forward.
153  return fix_string # give back processed form.
155  #unused?
156  def process_file_data(self):
157  """ iterates through the stored version of the file and replaces the phrase. """
158  self.statestate = self.EATING_NORMAL_TEXT;
159  # clear out any previously processed text.
160  self.processed_bufferprocessed_buffer = "" # reset our new version of the file contents.
161  self.normal_accumulatornormal_accumulator = ""
162  self.comment_accumulatorcomment_accumulator = ""
163  # iterate through the file's lines.
164  contents = self.file_linesfile_lines
165  while (len(contents) > 0):
166  # get the next line out of the input.
167  next_line = contents[0]
168  # drop that line from the remaining items.
169  contents = contents[1:]
170 # print("next line: {0}".format(next_line))
171  # decide if we need a state transition.
172  indy = 0
173  if ((len(next_line) > 0) and (self.statestate == self.EATING_NORMAL_TEXT) and ('/' in next_line)):
174  # loop to catch cases where multiple slashes are in line and one IS a comment.
175  while (indy < len(next_line)):
176  # locate next slash, if any.
177  indy = next_line.find('/', indy)
178  if (indy < 0):
179  break
180  if ((len(next_line) > indy + 1) and (next_line[indy + 1] == '/')):
181  # switch states and handle any pent-up text.
182  self.normal_accumulatornormal_accumulator += next_line[0:indy] # get last tidbit before comment start.
183  next_line = next_line[indy:] # keep only the stuff starting at slash.
184  self.statestate = self.EATING_ONELINE_COMMENT
185 # print("state => oneline comment")
186  self.emit_normal_accumulator()
187  break
188  if ((len(next_line) > indy + 1) and (next_line[indy + 1] == '*')):
189  # switch states and deal with accumulated text.
190  self.normal_accumulatornormal_accumulator += next_line[0:indy] # get last tidbit before comment start.
191  next_line = next_line[indy:] # keep only the stuff starting at slash.
192  self.statestate = self.EATING_MULTILINE_COMMENT
193 # print("state => multiline comment")
194  self.emit_normal_accumulator()
195  break
196  indy += 1 # no matches means we have to keep skipping forward.
198  # now handle things appropriately for our current state.
199  if (self.statestate == self.EATING_NORMAL_TEXT):
200  # add the text to the normal accumulator.
201 # print("would handle normal text")
202  self.normal_accumulatornormal_accumulator += next_line + "\n"
203  elif (self.statestate == self.EATING_ONELINE_COMMENT):
204  # save the text in comment accumulator.
205 # print("would handle oneline comment")
206  self.comment_accumulatorcomment_accumulator += next_line + "\n"
207  self.emit_comment_accumulator()
208  self.statestate = self.EATING_NORMAL_TEXT
209  elif (self.statestate == self.EATING_MULTILINE_COMMENT):
210  # save the text in comment accumulator.
211 # print("would handle multiline comment")
212  self.comment_accumulatorcomment_accumulator += next_line + "\n"
213  # check for whether the multi-line comment is completed on this line.
214  if ("*/" in next_line):
215 # print("found completion for multiline comment on line.")
216  self.emit_comment_accumulator()
217  self.statestate = self.EATING_NORMAL_TEXT
218  # verify we're not in the wrong state still.
219  if (self.statestate == self.EATING_MULTILINE_COMMENT):
220  print("file seems to have unclosed multi-line comment.")
221  # last step is to spit out whatever was trailing in the accumulator.
222  self.emit_normal_accumulator()
223  # if we got to here, we seem to have happily consumed the file.
224  return True
226  #use for example.
228  """ Orchestrates the process of replacing the phrases. """
229  # process our command line arguments to see what we need to do.
230  try_command_line = self.validate_and_consume_command_linevalidate_and_consume_command_line()
231  if (try_command_line != True):
232  print("failed to process the command line...\n")
233  self.print_instructionsprint_instructions()
234  exit(1)
235  # iterate through the list of files we were given and process them.
236  for i in range(0, len(self.filesfiles)):
237  print("file {0} is \'{1}\'".format(i, self.filesfiles[i]))
238  worked = self.read_file_dataread_file_data(self.filesfiles[i])
239  if (worked is False):
240  print("skipping since file read failed on: {0}".format(self.filesfiles[i]))
241  continue
242 # print("{0} got file contents:\n{1}".format(self.files[i], self.file_lines))
243  worked = self.process_file_dataprocess_file_data()
244  if (worked is False):
245  print("skipping, since processing failed on: {0}".format(self.filesfiles[i]))
246  continue
247  worked = self.write_file_datawrite_file_data(self.filesfiles[i])
248  if (worked is False):
249  print("writing file back failed on: {0}".format(self.filesfiles[i]))
250  print("finished processing all files.")
255  """ the main entry point to the project fixing process.
257  Operates on one project file at a time by:
258  1) finding all libraries (lib files) used by the project A,
259  2) locating the external project that creates each lib file,
260  3) adding a reference to the external projects to our project A.
262  We rely on some important assumptions to get this done:
263  1) project names and project file names are unique across the codebase,
264  2) the structure of the source code hierarchies uses a compatible architecture,
265 (which is?)
267  """
268  print("repair is unimplemented")
272  def extract_xml_tag(self, file_line, tag_name):
273  """ locates an XML tag with "tag_name" and returns the contents of the tag.
275  this currently assumes that the start tag, contents, and end tag are all on the same
276  line of text (which is not a very good assumption in general).
277  """
278  if ('<' + tag_name in file_line):
279  # find the greater than character.
280  gt_pos = file_line.find('>', 0)
281  if (gt_pos < 0): return "" # fail.
282  tag_close_pos = file_line.find('</' + tag_name, gt_pos + 1);
283  if (tag_close_pos < 0): return "" # fail.
284  return file_line[gt_pos + 1 : tag_close_pos]
285  return "" # failed to find anything relevant.
287  def extract_xml_attribute(self, file_line, tag_name, attribute_name):
288  """ locates an XML tag with "tag_name" and returns the value of the "attribute_name".
290  """
291  if ('<' + tag_name not in file_line): return ""
292  if (attribute_name not in file_line): return ""
293  attrib_pos = file_line.find(attribute_name, 0)
294  # find the first quote around the attribute.
295  quote_pos = file_line.find('"', attrib_pos)
296  if (quote_pos < 0): return "" # fail.
297  quote_close_pos = file_line.find('"', quote_pos + 1)
298  if (quote_close_pos < 0): return "" # fail.
299  return file_line[quote_pos + 1 : quote_close_pos]
303  def extract_xml_tag_from_file(self, filename, tag_name):
304  """ reads in a file and extracts the contents of a particular XML tag.
306 may not want a file read here. better to have a nice established way for
307 dealing with the existing buffer.
308  """
309  self.read_file_dataread_file_data(filename)
310  contents = self.file_linesfile_lines
311  while (len(contents) > 0):
312  # get the next line out of the input.
313  next_line = contents[0]
314  # drop that line from the remaining items.
315  contents = contents[1:]
316  #hmmm: maybe bad assumption thinking all on one line...
317  found = self.extract_xml_tagextract_xml_tag(next_line, tag_name)
318  if (found != ""): return found
319  return "" # failed to find anything relevant.
323  def extract_guid_from_project_file(self, filename):
324  """ reads in a visual studio project file and figures out that project's GUID.
326  note that this will fail horribly if the project has been messed with and is no
327  longer in microsoft's official format.
328  """
329  return self.extract_xml_tag_from_fileextract_xml_tag_from_file(filename, 'ProjectGuid')
332  """ given a ProjectReference line, this pulls out just the filename involved.
333  """
334  return self.extract_xml_attributeextract_xml_attribute(file_line, "ProjectReference", "Include")
336  def find_all_project_references(self, filename):
337  """ reads in a visual studio project file and locates all references.
338  """
339  self.read_file_dataread_file_data(filename)
340  contents = self.file_linesfile_lines
341  refs_list = []
342  while (len(contents) > 0):
343  # get the next line out of the input.
344  next_line = contents[0]
345  # drop that line from the remaining items.
346  contents = contents[1:]
347  ref = self.extract_filename_from_project_referenceextract_filename_from_project_reference(next_line)
348  if (ref != ''):
349  #print("got reference: " + os.path.basename(ref) + " => " + ref)
350  refs_list.append(ref)
351  return refs_list
353  def parse_dependency_line(self, line):
354  """ given an AdditionalDependencies line, this finds the libs listed.
355  """
356  just_libs = self.extract_xml_tagextract_xml_tag(line, "AdditionalDependencies")
357  if (just_libs == ""): return []
358  lib_list = just_libs.split(';')
359  # here we scan the list and remove any known-stupid entries.
360  for i in range(1, len(lib_list)):
361  if (lib_list[i] == '%(AdditionalDependencies)'):
362  lib_list = lib_list[0 : i] + lib_list[i + 1 : len(lib_list)]
363  i -= 1
364  return lib_list
366  #hmmm: could be a problem if the debug and release values differ.
367  def extract_dependencies(self, filename):
368  """ reads in a visual studio project file and locates all dependencies.
370  This will produce a list of the lib files used by c++ projects. These
371  are what we need to link up to their providing projects, if they're
372  actually things that we build.
373  """
374  self.read_file_dataread_file_data(filename)
375  contents = self.file_linesfile_lines
376  while (len(contents) > 0):
377  # get the next line out of the input.
378  next_line = contents[0]
379  # drop that line from the remaining items.
380  contents = contents[1:]
381  if ('AdditionalDependencies' in next_line):
382  deps = self.parse_dependency_lineparse_dependency_line(next_line)
383  return deps
384  return () # failed to find right line.
388 #hmmm: could also be a problem if the debug and release values differ.
389  def find_asset_created(self, filename):
390  """ determines the asset created by a visual studio project file.
392  This probably only works right on c++ projects. It will figure out the
393  item being created by the project using the breadcrumbs provided.
394  """
396  # these will need to be filled for us to have correctly determined what the project creates.
397  project_name = ""
398  config_type = ""
400  self.read_file_dataread_file_data(filename)
401  contents = self.file_linesfile_lines
402  while (len(contents) > 0):
403  # get the next line out of the input.
404  next_line = contents[0]
405  # drop that line from the remaining items.
406  contents = contents[1:]
408  # we need the ProjectName or RootNamespace.
409  #hmmm: csproj seems like they will work with this scheme already.
410  if project_name == "":
411  temp = self.extract_xml_tagextract_xml_tag(next_line, "RootNamespace")
412  if (temp != ""):
413  project_name = temp
414 # print("found project name of " + project_name)
415  continue
417  # we look for the ConfigurationType, which tells us:
418  # DynamicLibrary | Library | StaticLibrary | Application | WinExe | Exe | Utility
419  if config_type == "":
420  temp = self.extract_xml_tagextract_xml_tag(next_line, "ConfigurationType")
421  if (temp != ""):
422  config_type = temp
423 # print("found config type of " + config_type)
424  continue
425  temp = self.extract_xml_tagextract_xml_tag(next_line, "OutputType")
426  if (temp != ""):
427  config_type = temp
428 # print("found output type of " + config_type)
429  continue
431  if (config_type != "") and (project_name != ""):
432  asset_name = project_name
433  if (config_type == "DynamicLibrary"): asset_name += ".dll"
434  elif (config_type == "Library"): asset_name += ".dll"
435  elif (config_type == "StaticLibrary"): asset_name += ".lib"
436  elif (config_type == "Application"): asset_name += ".exe"
437  elif (config_type == "WinExe"): asset_name += ".exe"
438  elif (config_type == "Exe"): asset_name += ".exe"
439  elif (config_type == "Utility"): return ""
440  else:
441  print("unknown configuration type: " + config_type + "\nin proj file: " + filename)
442  return ""
443  # we think we're successful in figuring out what should be created.
444  return asset_name
446  return "" # failed to find right lines.
451  """ traverses the directory in "dir" and finds all the project files.
453  the project files are returned as a massive list.
454  """
456 #hmmm: important optimization for walk; if the file where we store these things exists,
457 # read directly out of that file instead of redoing hierarchy. people can
458 # delete the file when they want a fresh look.
460  to_return = []
462  for root, dirs, files in os.walk(dir):
463 #demo code from web:
464 # print root, "consumes",
465 # print sum(os.path.getsize(os.path.join(root, name)) for name in files),
466 # print "bytes in", len(files), "non-directory files"
467  if 'CVS' in dirs:
468  dirs.remove('CVS') # don't visit CVS directories.
469  elif '.svn' in dirs:
470  dirs.remove('.svn') # don't visit subversion directories either.
471  for curr_file in files:
472  indy = len(curr_file) - 4
473  # see if the file has the right ending to be a visual studio project file.
474  if curr_file[indy:].lower() == 'proj':
475  full_path = os.path.join(root, curr_file)
476 # print("full path: " + full_path)
477  # now add this good one to the returned list of projects.
478  to_return.append(full_path)
479  return to_return
481  def find_relative_path(self, our_path, project_file):
482  """ calculates path between directory at "our_path" to the location of "project_file".
484  this assumes that the locations are actually rooted at the same place; otherwise there is
485  no path between the locations. the location at "our_path" is considered to be the source,
486  or where we start out. the location for "project_file" is the target location.
487  """
489  # canonicalize these to be linux paths. we want to be able to split on forward slashes.
490  sourcel = our_path.replace('\\', '/')
491  targee = project_file.replace('\\', '/')
492  # fracture the paths into their directory and filename components.
493  sourcel = str.split(sourcel, '/')
494  targee = str.split(targee, '/')
495  # remove last item, which should be project filename.
496  targee.pop()
497  # destroy identical elements until items don't match or one path is gone.
498  while (len(sourcel) and len(targee) and (sourcel[0] == targee[0])):
499  sourcel.pop(0)
500  targee.pop(0)
501 # print("after dinner, sourcel now: " + " / ".join(sourcel))
502 # print("and targee now: " + " / ".join(targee))
503  # we comput the directory prefix of dot-dots based on the number of elements left
504  # in the source path.
505  prefix = ""
506  for i in range(0, len(sourcel)):
507  prefix += "../"
508  print("calculated a prefix of: " + prefix)
509 #prove it is right in unit test.
510  return prefix + "/".join(targee)
514  def locate_all_assets(self):
515  """ locates every project file in our list and determines the asset created by it.
517  this returns a dictionary of {asset=project} items. we index by asset way more frequently
518  than by project, so the asset name is used as our key.
519  """
520  to_return = {}
521  for proj in self.projectsprojects:
522  asset_found = self.find_asset_createdfind_asset_created(proj)
523  # make sure we don't record a bogus project with no useful asset.
524  if (asset_found == ""):
525  self.projectsprojects.remove(proj) # should be safe for our list iteration.
526  continue # skip adding the bogus one.
527  to_return[asset_found] = proj
528 # print("proj " + proj + " creates: " + asset_found)
529  return to_return
533  def locate_referenced_projects(self, project):
534  """ finds all the libraries needed by the "project" file and returns their project files.
535  """
536  to_return = []
537  # find the libraries and such used by this project.
538  libs = self.extract_dependenciesextract_dependencies(project)
539 # print("was told the libs used by " + project + " are:\n" + " ".join(libs))
540  # now find who creates those things.
541  for current in libs:
542  # if we know of the library in our list of assets, then we can use it right away.
543  if current in self.assetsassets:
544  # this item exists and is created by one of our projects.
545  proj_needed = self.assetsassets[current]
546 # print("asset '" + current + "' created by: " + proj_needed)
547  to_return.append(proj_needed)
548  continue
549  # if we didn't find the thing with it's current name, we'll see if we can convert
550  # it into a dll and look for that instead.
551 # print("looking at piece: " + current[len(current) - 4:])
552  if current[len(current) - 4:] == ".lib":
553 # print("found rogue we can convert to a dll to find: " + current)
554  current = current[0:-4] + ".dll"
555 # print("new name: " + current)
556  if current in self.assetsassets:
557  proj_needed = self.assetsassets[current]
558 # print("found asset '" + current + "' created by: " + proj_needed)
559  to_return.append(proj_needed)
560  continue
561 # else:
562 # print("could not find '" + current + "' as an asset that we know how to create.");
563  return to_return
565  def remove_redundant_references(self, project):
566  """ cleans out any references in "project" to assets that we intend to update.
568  this actually modifies the file. it had better be right.
569  """
570 #load file data for the thing
571 #find references
572  #see if reference is one we know about
573  #if so, zap it out of file contents
574 #write file back out
578  def unit_test(self):
579  """ a sort-of unit test for the functions in this script.
581  currently geared for manual inspection of the test results.
582  """
583  print("testing some of the methods...")
584  test_file = ""
585  if len(self.argumentsarguments) > 1:
586  test_file = self.argumentsarguments[1]
587  if test_file == "": test_file = os.getenv("FEISTY_MEOW_APEX") + "/nucleus/applications/nechung/nechung.vcxproj"
588  print("test file is: " + test_file)
590  guid = self.extract_guid_from_project_fileextract_guid_from_project_file(test_file)
591  print("from proj, got a guid of " + guid)
593  refs = self.find_all_project_referencesfind_all_project_references(test_file)
594  print("refs list is: " + " ".join(refs))
596 # libs = self.extract_dependencies(test_file)
597 # print("was told the libs used are: " + " ".join(libs))
599  asset = self.find_asset_createdfind_asset_created(test_file)
600  print("our created asset is: " + asset)
602 # print("walked directories got:\n" + " ".join(fixit.projects))
604 # print("assets found are:\n" + " ".join(fixit.assets))
606  if (len(fixit.projects) > 0):
607  rando = random.randint(0, len(fixit.projects) - 1)
608  print("index chosen to examine: {0}".format(rando))
610  relpath = self.find_relative_pathfind_relative_path(os.path.dirname(test_file), fixit.projects[rando])
611  print("found relative path from source:\n " + test_file)
612  print("to target:\n " + fixit.projects[rando])
613  print("is this =>\n " + relpath)
615  full_refs = self.locate_referenced_projectslocate_referenced_projects(test_file)
616  print("refs found are:\n" + " ".join(full_refs))
618  self.remove_redundant_referencesremove_redundant_references(test_file)
619  print("we just munged your file! go check it! no references should remain that are in our new list.")
621 #still needed:
622 # remove any existing references that we now have a replacement for.
623 # base this on the basename of the project file? blah.vcxproj already there e.g.
624 # spit out xml form of references for the dependent projects.
625 # put new references into the right place in file.
630 # run the script if we are non-interactive.
631 if __name__ == "__main__":
632  import sys
633  fixit = fix_project_references(sys.argv)
635  # comment this out when script is working.
636  fixit.unit_test()
638  print("we're bailing before doing anything real...")
639  exit(3)
641  fixit.repair_project_references()
645 # parking lot of things to do in future:
