feisty meow concerns codebase  2.140
phrase_replacer.py
Go to the documentation of this file.
1 #!/usr/bin/python
2 
4  """ A simple replacement tool that honors some C/C++ syntax when replacing.
5 
6  This will take a particular phrase given by the user and find it in a set of
7  documents. That phrase will be replaced when it appears completely, and is not
8  in a C or C++ style comment (// or /* ... */). It also must be clear of any
9  other alphanumeric pollution, and only be surrounded by white space or operation
10  characters.
11  """
12 
13  def __init__(self, argv):
14  """ Initializes the class with a set of arguments to work with.
15 
16  The arguments need to be in the form described by print_instructions().
17  """
18  self.argumentsarguments = argv
19  # we have three states for the processing: consuming normal code (not within a comment),
20  # consuming a single line comment, and consuming a multi-line comment.
21  self.EATING_NORMAL_TEXTEATING_NORMAL_TEXT = 0
22  self.EATING_ONELINE_COMMENTEATING_ONELINE_COMMENT = 1
23  self.EATING_MULTILINE_COMMENTEATING_MULTILINE_COMMENT = 2
24 
25  def print_instructions(self):
26  """ Shows the instructions for using this class. """
27  print("""
28 This script will replace all occurrences of a phrase you specify in a set of files. The
29 replacement process will be careful about C and C++ syntax and will not replace occurrences
30 within comments or which are not "complete" phrases (due to other alpha-numeric characters
31 that abut the phrase). The arguments to the script are:
32 
33  {0}: PhraseToReplace ReplacementPhrase File1 [File2 ...]
34 
35 For example, if the phrase to replace is Goop, it will be replaced in these contexts:
36  Goop[32]
37  molo-Goop
38  *Goop
39 but it will not be found in these contexts:
40  // doop de Goop
41  rGoop
42  Goop23
43 """.format(self.argumentsarguments[0]))
44 
46  """ Performs command line argument handling. """
47  arg_count = len(self.argumentsarguments)
48 # for i in range(1, arg_count):
49 # print("i is {0}, arg is {1}".format(i, self.arguments[i]))
50  # we need more than 2 arguments, since there needs to be at least one file also.
51  if arg_count < 4:
52  return False
53  self.phrase_to_replacephrase_to_replace = self.argumentsarguments[1]
54  self.replacement_bitreplacement_bit = self.argumentsarguments[2]
55  print("got phrase to replace: \'{0}\' and replacement: \'{1}\'".format(self.phrase_to_replacephrase_to_replace, self.replacement_bitreplacement_bit))
56  self.filesfiles = self.argumentsarguments[3:]
57  return True
58 
59  def read_file_data(self, filename):
60  """ loads the file into our memory buffer for processing. """
61  try:
62  our_file = open(filename, "rb")
63  try:
64  file_buffer = our_file.read()
65  except IOError:
66  print("There was an error reading the file {0}".format(filename))
67  return False
68  finally:
69  our_file.close()
70  except IOError:
71  print("There was an error opening the file {0}".format(filename))
72  return False
73  self.file_linesfile_lines = file_buffer.splitlines()
74  return True
75 
76  def write_file_data(self, filename):
77  """ takes the processed buffer and sends it back out to the filename. """
78 # output_filename = filename + ".new" # safe testing version.
79  output_filename = filename
80  try:
81  our_file = open(output_filename, "wb")
82  try:
83  file_buffer = our_file.write(self.processed_bufferprocessed_buffer)
84  except IOError:
85  print("There was an error writing the file {0}".format(output_filename))
86  return False
87  finally:
88  our_file.close()
89  except IOError:
90  print("There was an error opening the file {0}".format(output_filename))
91  return False
92  return True
93 
94  def is_alphanumeric(self, check_char):
95  """ given a character, this returns true if it's between a-z, A-Z or 0-9. """
96  if (check_char[0] == "_"):
97  return True
98  if ( (check_char[0] <= "z") and (check_char[0] >= "a")):
99  return True
100  if ( (check_char[0] <= "Z") and (check_char[0] >= "A")):
101  return True
102  if ( (check_char[0] <= "9") and (check_char[0] >= "0")):
103  return True
104  return False
105 
106  def replace_within_string(self, fix_string):
107  """ given a string to fix, this replaces all appropriate locations of the phrase. """
108  indy = 0
109 # print("got to replace within string")
110  while (indy < len(fix_string)):
111  # locate next occurrence of replacement text, if any.
112  indy = fix_string.find(self.phrase_to_replacephrase_to_replace, indy)
113 # print("find indy={0}".format(indy))
114  if (indy > -1):
115 # print("found occurrence of replacement string")
116  # we found an occurrence, but we have to validate it's separated enough.
117  char_before = "?" # simple default that won't fail our check.
118  char_after = "?"
119  if (indy > 0):
120  char_before = fix_string[indy-1]
121  if (indy + len(self.phrase_to_replacephrase_to_replace) < len(fix_string) - 1):
122  char_after = fix_string[indy+len(self.phrase_to_replacephrase_to_replace)]
123 # print("char before {0}, char after {1}".format(char_before, char_after))
124  if (not self.is_alphanumericis_alphanumeric(char_before) and not self.is_alphanumericis_alphanumeric(char_after)):
125  # this looks like a good candidate for replacement.
126  fix_string = "{0}{1}{2}".format(fix_string[0:indy], self.replacement_bitreplacement_bit, fix_string[indy+len(self.phrase_to_replacephrase_to_replace):])
127 # print("changed string to: {0}".format(fix_string))
128  else:
129  break
130  indy += 1 # no matches means we have to keep skipping forward.
131  return fix_string # give back processed form.
132 
134  """ handle emission of a chunk of normal code (without comments). """
135  # process the text to perform the replacement...
136  self.normal_accumulatornormal_accumulator = self.replace_within_stringreplace_within_string(self.normal_accumulatornormal_accumulator)
137  # then send the text into our main buffer; we're done looking at it.
138  self.processed_bufferprocessed_buffer += self.normal_accumulatornormal_accumulator
139  self.normal_accumulatornormal_accumulator = ""
140 
142  """ emits the piled up text for comments found in the code. """
143  self.processed_bufferprocessed_buffer += self.comment_accumulatorcomment_accumulator
144  self.comment_accumulatorcomment_accumulator = ""
145 
146  def process_file_data(self):
147  """ iterates through the stored version of the file and replaces the phrase. """
148  self.statestate = self.EATING_NORMAL_TEXTEATING_NORMAL_TEXT;
149  # clear out any previously processed text.
150  self.processed_bufferprocessed_buffer = "" # reset our new version of the file contents.
151  self.normal_accumulatornormal_accumulator = ""
152  self.comment_accumulatorcomment_accumulator = ""
153  # iterate through the file's lines.
154  while (len(self.file_linesfile_lines) > 0):
155  # get the next line out of the input.
156  next_line = self.file_linesfile_lines[0]
157  # drop that line from the remaining items.
158  self.file_linesfile_lines = self.file_linesfile_lines[1:]
159 # print("next line: {0}".format(next_line))
160  # decide if we need a state transition.
161  indy = 0
162  if ((len(next_line) > 0) and (self.statestate == self.EATING_NORMAL_TEXTEATING_NORMAL_TEXT) and ('/' in next_line)):
163  # loop to catch cases where multiple slashes are in line and one IS a comment.
164  while (indy < len(next_line)):
165  # locate next slash, if any.
166  indy = next_line.find('/', indy)
167  if (indy < 0):
168  break
169  if ((len(next_line) > indy + 1) and (next_line[indy + 1] == '/')):
170  # switch states and handle any pent-up text.
171  self.normal_accumulatornormal_accumulator += next_line[0:indy] # get last tidbit before comment start.
172  next_line = next_line[indy:] # keep only the stuff starting at slash.
173  self.statestate = self.EATING_ONELINE_COMMENTEATING_ONELINE_COMMENT
174 # print("state => oneline comment")
175  self.emit_normal_accumulatoremit_normal_accumulator()
176  break
177  if ((len(next_line) > indy + 1) and (next_line[indy + 1] == '*')):
178  # switch states and deal with accumulated text.
179  self.normal_accumulatornormal_accumulator += next_line[0:indy] # get last tidbit before comment start.
180  next_line = next_line[indy:] # keep only the stuff starting at slash.
181  self.statestate = self.EATING_MULTILINE_COMMENTEATING_MULTILINE_COMMENT
182 # print("state => multiline comment")
183  self.emit_normal_accumulatoremit_normal_accumulator()
184  break
185  indy += 1 # no matches means we have to keep skipping forward.
186 
187  # now handle things appropriately for our current state.
188  if (self.statestate == self.EATING_NORMAL_TEXTEATING_NORMAL_TEXT):
189  # add the text to the normal accumulator.
190 # print("would handle normal text")
191  self.normal_accumulatornormal_accumulator += next_line + "\n"
192  elif (self.statestate == self.EATING_ONELINE_COMMENTEATING_ONELINE_COMMENT):
193  # save the text in comment accumulator.
194 # print("would handle oneline comment")
195  self.comment_accumulatorcomment_accumulator += next_line + "\n"
196  self.emit_comment_accumulatoremit_comment_accumulator()
197  self.statestate = self.EATING_NORMAL_TEXTEATING_NORMAL_TEXT
198  elif (self.statestate == self.EATING_MULTILINE_COMMENTEATING_MULTILINE_COMMENT):
199  # save the text in comment accumulator.
200 # print("would handle multiline comment")
201  self.comment_accumulatorcomment_accumulator += next_line + "\n"
202  # check for whether the multi-line comment is completed on this line.
203  if ("*/" in next_line):
204 # print("found completion for multiline comment on line.")
205  self.emit_comment_accumulatoremit_comment_accumulator()
206  self.statestate = self.EATING_NORMAL_TEXTEATING_NORMAL_TEXT
207  # verify we're not in the wrong state still.
208  if (self.statestate == self.EATING_MULTILINE_COMMENTEATING_MULTILINE_COMMENT):
209  print("file seems to have unclosed multi-line comment.")
210  # last step is to spit out whatever was trailing in the accumulator.
211  self.emit_normal_accumulatoremit_normal_accumulator()
212  # if we got to here, we seem to have happily consumed the file.
213  return True
214 
216  """ Orchestrates the process of replacing the phrases. """
217  # process our command line arguments to see what we need to do.
218  try_command_line = self.validate_and_consume_command_linevalidate_and_consume_command_line()
219  if (try_command_line != True):
220  print("failed to process the command line...\n")
221  self.print_instructionsprint_instructions()
222  exit(1)
223  # iterate through the list of files we were given and process them.
224  for i in range(0, len(self.filesfiles)):
225  print("file {0} is \'{1}\'".format(i, self.filesfiles[i]))
226  worked = self.read_file_dataread_file_data(self.filesfiles[i])
227  if (worked is False):
228  print("skipping since file read failed on: {0}".format(self.filesfiles[i]))
229  continue
230 # print("{0} got file contents:\n{1}".format(self.files[i], self.file_lines))
231  worked = self.process_file_dataprocess_file_data()
232  if (worked is False):
233  print("skipping, since processing failed on: {0}".format(self.filesfiles[i]))
234  continue
235  worked = self.write_file_datawrite_file_data(self.filesfiles[i])
236  if (worked is False):
237  print("writing file back failed on: {0}".format(self.filesfiles[i]))
238  print("finished processing all files.")
239 
240 
241 if __name__ == "__main__":
242  import sys
243  slicer = phrase_replacer(sys.argv)
244  slicer.replace_all_occurrences()
245 
246 
247 
248 # parking lot of things to do in future:
249 
250 #hmmm: actually sometimes one DOES want to replace within comments. argh.
251 # make ignoring inside comments an optional thing. later.
252 
253 # hmmm: one little issue here is if the text to be replaced happens to reside on
254 # the same line after a multi-line comment. we are okay with ignoring that
255 # possibility for now since it seems brain-dead to write code that way.
256 
257 
#define open
Definition: Xos2defs.h:36
def is_alphanumeric(self, check_char)
def read_file_data(self, filename)
def write_file_data(self, filename)
def replace_within_string(self, fix_string)