scripts/text/phrase_replacer.py

   1 #!/usr/bin/python
   2
   3 class phrase_replacer:
   4   """ A simple replacement tool that honors some C/C++ syntax when replacing.
   5
   6       This will take a particular phrase given by the user and find it in a set of
   7       documents.  That phrase will be replaced when it appears completely, and is not
   8       in a C or C++ style comment (// or /* ... */).  It also must be clear of any
   9       other alphanumeric pollution, and only be surrounded by white space or operation
  10       characters.
  11   """
  12
  13   def __init__(self, argv):
  14     """ Initializes the class with a set of arguments to work with.
  15
  16         The arguments need to be in the form described by print_instructions().
  17     """
  18     self.arguments = argv
  19     # we have three states for the processing: consuming normal code (not within a comment),
  20     # consuming a single line comment, and consuming a multi-line comment.
  21     self.EATING_NORMAL_TEXT = 0
  22     self.EATING_ONELINE_COMMENT = 1
  23     self.EATING_MULTILINE_COMMENT = 2
  24
  25   def print_instructions(self):
  26     """ Shows the instructions for using this class. """
  27     print("""
  28 This script will replace all occurrences of a phrase you specify in a set of files.  The
  29 replacement process will be careful about C and C++ syntax and will not replace occurrences
  30 within comments or which are not "complete" phrases (due to other alpha-numeric characters
  31 that abut the phrase).  The arguments to the script are:
  32
  33   {0}: PhraseToReplace  ReplacementPhrase  File1  [File2 ...]
  34
  35 For example, if the phrase to replace is Goop, it will be replaced in these contexts:
  36   Goop[32]
  37   molo-Goop
  38   *Goop
  39 but it will not be found in these contexts:
  40   // doop de Goop
  41   rGoop
  42   Goop23
  43 """.format(self.arguments[0]))
  44
  45   def validate_and_consume_command_line(self):
  46     """ Performs command line argument handling. """
  47     arg_count = len(self.arguments)
  48 #    for i in range(1, arg_count):
  49 #      print("i is {0}, arg is {1}".format(i, self.arguments[i]))
  50     # we need more than 2 arguments, since there needs to be at least one file also.
  51     if arg_count < 4:
  52       return False
  53     self.phrase_to_replace = self.arguments[1]
  54     self.replacement_bit = self.arguments[2]
  55     print("got phrase to replace: \'{0}\' and replacement: \'{1}\'".format(self.phrase_to_replace, self.replacement_bit))
  56     self.files = self.arguments[3:]
  57     return True
  58
  59   def read_file_data(self, filename):
  60     """ loads the file into our memory buffer for processing. """
  61     try:
  62       our_file = open(filename, "rb")
  63       try:
  64         file_buffer = our_file.read()
  65       except IOError:
  66         print("There was an error reading the file {0}".format(filename))
  67         return False
  68       finally:
  69         our_file.close()
  70     except IOError:
  71       print("There was an error opening the file {0}".format(filename))
  72       return False
  73     self.file_lines = file_buffer.splitlines()
  74     return True
  75
  76   def write_file_data(self, filename):
  77     """ takes the processed buffer and sends it back out to the filename. """
  78 #    output_filename = filename + ".new"  # safe testing version.
  79     output_filename = filename
  80     try:
  81       our_file = open(output_filename, "wb")
  82       try:
  83         file_buffer = our_file.write(self.processed_buffer)
  84       except IOError:
  85         print("There was an error writing the file {0}".format(output_filename))
  86         return False
  87       finally:
  88         our_file.close()
  89     except IOError:
  90       print("There was an error opening the file {0}".format(output_filename))
  91       return False
  92     return True
  93
  94   def is_alphanumeric(self, check_char):
  95     """ given a character, this returns true if it's between a-z, A-Z or 0-9. """
  96     if (check_char[0] == "_"):
  97       return True
  98     if ( (check_char[0] <= "z") and (check_char[0] >= "a")):
  99       return True
 100     if ( (check_char[0] <= "Z") and (check_char[0] >= "A")):
 101       return True
 102     if ( (check_char[0] <= "9") and (check_char[0] >= "0")):
 103       return True
 104     return False
 105
 106   def replace_within_string(self, fix_string):
 107     """ given a string to fix, this replaces all appropriate locations of the phrase. """
 108     indy = 0
 109 #    print("got to replace within string")
 110     while (indy < len(fix_string)):
 111       # locate next occurrence of replacement text, if any.
 112       indy = fix_string.find(self.phrase_to_replace, indy)
 113 #      print("find indy={0}".format(indy))
 114       if (indy > -1):
 115 #        print("found occurrence of replacement string")
 116         # we found an occurrence, but we have to validate it's separated enough.
 117         char_before = "?"  # simple default that won't fail our check.
 118         char_after = "?"
 119         if (indy > 0):
 120           char_before = fix_string[indy-1]
 121         if (indy + len(self.phrase_to_replace) < len(fix_string) - 1):
 122           char_after = fix_string[indy+len(self.phrase_to_replace)]
 123 #        print("char before {0}, char after {1}".format(char_before, char_after))
 124         if (not self.is_alphanumeric(char_before) and not self.is_alphanumeric(char_after)):
 125           # this looks like a good candidate for replacement.
 126           fix_string = "{0}{1}{2}".format(fix_string[0:indy], self.replacement_bit, fix_string[indy+len(self.phrase_to_replace):])
 127 #          print("changed string to: {0}".format(fix_string))
 128       else:
 129         break
 130       indy += 1  # no matches means we have to keep skipping forward.
 131     return fix_string  # give back processed form.
 132
 133   def emit_normal_accumulator(self):
 134     """ handle emission of a chunk of normal code (without comments). """
 135     # process the text to perform the replacement...
 136     self.normal_accumulator = self.replace_within_string(self.normal_accumulator)
 137     # then send the text into our main buffer; we're done looking at it.
 138     self.processed_buffer += self.normal_accumulator
 139     self.normal_accumulator = ""
 140
 141   def emit_comment_accumulator(self):
 142     """ emits the piled up text for comments found in the code. """
 143     self.processed_buffer += self.comment_accumulator
 144     self.comment_accumulator = ""
 145
 146   def process_file_data(self):
 147     """ iterates through the stored version of the file and replaces the phrase. """
 148     self.state = self.EATING_NORMAL_TEXT;
 149     # clear out any previously processed text.
 150     self.processed_buffer = ""   # reset our new version of the file contents.
 151     self.normal_accumulator = ""
 152     self.comment_accumulator = ""
 153     # iterate through the file's lines.
 154     while (len(self.file_lines) > 0):
 155       # get the next line out of the input.
 156       next_line = self.file_lines[0]
 157       # drop that line from the remaining items.
 158       self.file_lines = self.file_lines[1:]
 159 #      print("next line: {0}".format(next_line))
 160       # decide if we need a state transition.
 161       indy = 0
 162       if ((len(next_line) > 0) and (self.state == self.EATING_NORMAL_TEXT) and ('/' in next_line)):
 163         # loop to catch cases where multiple slashes are in line and one IS a comment.
 164         while (indy < len(next_line)):
 165           # locate next slash, if any.
 166           indy = next_line.find('/', indy)
 167           if (indy < 0):
 168             break
 169           if ((len(next_line) > indy + 1) and (next_line[indy + 1] == '/')):
 170             # switch states and handle any pent-up text.
 171             self.normal_accumulator += next_line[0:indy]  # get last tidbit before comment start.
 172             next_line = next_line[indy:]  # keep only the stuff starting at slash.
 173             self.state = self.EATING_ONELINE_COMMENT
 174 #            print("state => oneline comment")
 175             self.emit_normal_accumulator()
 176             break
 177           if ((len(next_line) > indy + 1) and (next_line[indy + 1] == '*')):
 178             # switch states and deal with accumulated text.
 179             self.normal_accumulator += next_line[0:indy]  # get last tidbit before comment start.
 180             next_line = next_line[indy:]  # keep only the stuff starting at slash.
 181             self.state = self.EATING_MULTILINE_COMMENT
 182 #            print("state => multiline comment")
 183             self.emit_normal_accumulator()
 184             break
 185           indy += 1  # no matches means we have to keep skipping forward.
 186
 187       # now handle things appropriately for our current state.
 188       if (self.state == self.EATING_NORMAL_TEXT):
 189         # add the text to the normal accumulator.
 190 #        print("would handle normal text")
 191         self.normal_accumulator += next_line + "\n"
 192       elif (self.state == self.EATING_ONELINE_COMMENT):
 193         # save the text in comment accumulator.
 194 #        print("would handle oneline comment")
 195         self.comment_accumulator += next_line + "\n"
 196         self.emit_comment_accumulator()
 197         self.state = self.EATING_NORMAL_TEXT
 198       elif (self.state == self.EATING_MULTILINE_COMMENT):
 199         # save the text in comment accumulator.
 200 #        print("would handle multiline comment")
 201         self.comment_accumulator += next_line + "\n"
 202         # check for whether the multi-line comment is completed on this line.
 203         if ("*/" in next_line):
 204 #          print("found completion for multiline comment on line.")
 205           self.emit_comment_accumulator()
 206           self.state = self.EATING_NORMAL_TEXT
 207     # verify we're not in the wrong state still.
 208     if (self.state == self.EATING_MULTILINE_COMMENT):
 209       print("file seems to have unclosed multi-line comment.")
 210     # last step is to spit out whatever was trailing in the accumulator.
 211     self.emit_normal_accumulator()
 212     # if we got to here, we seem to have happily consumed the file.
 213     return True
 214
 215   def replace_all_occurrences(self):
 216     """ Orchestrates the process of replacing the phrases. """
 217     # process our command line arguments to see what we need to do.
 218     try_command_line = self.validate_and_consume_command_line()
 219     if (try_command_line != True):
 220       print("failed to process the command line...\n")
 221       self.print_instructions()
 222       exit(1)
 223     # iterate through the list of files we were given and process them.
 224     for i in range(0, len(self.files)):
 225       print("file {0} is \'{1}\'".format(i, self.files[i]))
 226       worked = self.read_file_data(self.files[i])
 227       if (worked is False):
 228         print("skipping since file read failed on: {0}".format(self.files[i]))
 229         continue
 230 #      print("{0} got file contents:\n{1}".format(self.files[i], self.file_lines))
 231       worked = self.process_file_data()
 232       if (worked is False):
 233         print("skipping, since processing failed on: {0}".format(self.files[i]))
 234         continue
 235       worked = self.write_file_data(self.files[i])
 236       if (worked is False):
 237         print("writing file back failed on: {0}".format(self.files[i]))
 238     print("finished processing all files.")
 239
 240
 241 if __name__ == "__main__":
 242     import sys
 243     slicer = phrase_replacer(sys.argv)
 244     slicer.replace_all_occurrences()
 245
 246 ##############
 247
 248 # parking lot of things to do in future:
 249
 250 #hmmm: actually sometimes one DOES want to replace within comments.  argh.
 251 #      make ignoring inside comments an optional thing.  later.
 252
 253 # hmmm: one little issue here is if the text to be replaced happens to reside on
 254 #       the same line after a multi-line comment.  we are okay with ignoring that
 255 #       possibility for now since it seems brain-dead to write code that way.
 256
 257