4 """ A simple replacement tool that honors some C/C++ syntax when replacing.
6 This will take a particular phrase given by the user and find it in a set of
7 documents. That phrase will be replaced when it appears completely, and is not
8 in a C or C++ style comment (// or /* ... */). It also must be clear of any
9 other alphanumeric pollution, and only be surrounded by white space or operation
13 def __init__(self, argv):
14 """ Initializes the class with a set of arguments to work with.
16 The arguments need to be in the form described by print_instructions().
19 # we have three states for the processing: consuming normal code (not within a comment),
20 # consuming a single line comment, and consuming a multi-line comment.
21 self.EATING_NORMAL_TEXT = 0
22 self.EATING_ONELINE_COMMENT = 1
23 self.EATING_MULTILINE_COMMENT = 2
25 def print_instructions(self):
26 """ Shows the instructions for using this class. """
28 This script will replace all occurrences of a phrase you specify in a set of files. The
29 replacement process will be careful about C and C++ syntax and will not replace occurrences
30 within comments or which are not "complete" phrases (due to other alpha-numeric characters
31 that abut the phrase). The arguments to the script are:
33 {0}: PhraseToReplace ReplacementPhrase File1 [File2 ...]
35 For example, if the phrase to replace is Goop, it will be replaced in these contexts:
39 but it will not be found in these contexts:
43 """.format(self.arguments[0]))
45 def validate_and_consume_command_line(self):
46 """ Performs command line argument handling. """
47 arg_count = len(self.arguments)
48 # for i in range(1, arg_count):
49 # print("i is {0}, arg is {1}".format(i, self.arguments[i]))
50 # we need more than 2 arguments, since there needs to be at least one file also.
53 self.phrase_to_replace = self.arguments[1]
54 self.replacement_bit = self.arguments[2]
55 print("got phrase to replace: \'{0}\' and replacement: \'{1}\'".format(self.phrase_to_replace, self.replacement_bit))
56 self.files = self.arguments[3:]
59 def read_file_data(self, filename):
60 """ loads the file into our memory buffer for processing. """
62 our_file = open(filename, "rb")
64 file_buffer = our_file.read()
66 print("There was an error reading the file {0}".format(filename))
71 print("There was an error opening the file {0}".format(filename))
73 self.file_lines = file_buffer.splitlines()
76 def write_file_data(self, filename):
77 """ takes the processed buffer and sends it back out to the filename. """
78 # output_filename = filename + ".new" # safe testing version.
79 output_filename = filename
81 our_file = open(output_filename, "wb")
83 file_buffer = our_file.write(self.processed_buffer)
85 print("There was an error writing the file {0}".format(output_filename))
90 print("There was an error opening the file {0}".format(output_filename))
94 def is_alphanumeric(self, check_char):
95 """ given a character, this returns true if it's between a-z, A-Z or 0-9. """
96 if (check_char[0] == "_"):
98 if ( (check_char[0] <= "z") and (check_char[0] >= "a")):
100 if ( (check_char[0] <= "Z") and (check_char[0] >= "A")):
102 if ( (check_char[0] <= "9") and (check_char[0] >= "0")):
106 def replace_within_string(self, fix_string):
107 """ given a string to fix, this replaces all appropriate locations of the phrase. """
109 # print("got to replace within string")
110 while (indy < len(fix_string)):
111 # locate next occurrence of replacement text, if any.
112 indy = fix_string.find(self.phrase_to_replace, indy)
113 # print("find indy={0}".format(indy))
115 # print("found occurrence of replacement string")
116 # we found an occurrence, but we have to validate it's separated enough.
117 char_before = "?" # simple default that won't fail our check.
120 char_before = fix_string[indy-1]
121 if (indy + len(self.phrase_to_replace) < len(fix_string) - 1):
122 char_after = fix_string[indy+len(self.phrase_to_replace)]
123 # print("char before {0}, char after {1}".format(char_before, char_after))
124 if (not self.is_alphanumeric(char_before) and not self.is_alphanumeric(char_after)):
125 # this looks like a good candidate for replacement.
126 fix_string = "{0}{1}{2}".format(fix_string[0:indy], self.replacement_bit, fix_string[indy+len(self.phrase_to_replace):])
127 # print("changed string to: {0}".format(fix_string))
130 indy += 1 # no matches means we have to keep skipping forward.
131 return fix_string # give back processed form.
133 def emit_normal_accumulator(self):
134 """ handle emission of a chunk of normal code (without comments). """
135 # process the text to perform the replacement...
136 self.normal_accumulator = self.replace_within_string(self.normal_accumulator)
137 # then send the text into our main buffer; we're done looking at it.
138 self.processed_buffer += self.normal_accumulator
139 self.normal_accumulator = ""
141 def emit_comment_accumulator(self):
142 """ emits the piled up text for comments found in the code. """
143 self.processed_buffer += self.comment_accumulator
144 self.comment_accumulator = ""
146 def process_file_data(self):
147 """ iterates through the stored version of the file and replaces the phrase. """
148 self.state = self.EATING_NORMAL_TEXT;
149 # clear out any previously processed text.
150 self.processed_buffer = "" # reset our new version of the file contents.
151 self.normal_accumulator = ""
152 self.comment_accumulator = ""
153 # iterate through the file's lines.
154 while (len(self.file_lines) > 0):
155 # get the next line out of the input.
156 next_line = self.file_lines[0]
157 # drop that line from the remaining items.
158 self.file_lines = self.file_lines[1:]
159 # print("next line: {0}".format(next_line))
160 # decide if we need a state transition.
162 if ((len(next_line) > 0) and (self.state == self.EATING_NORMAL_TEXT) and ('/' in next_line)):
163 # loop to catch cases where multiple slashes are in line and one IS a comment.
164 while (indy < len(next_line)):
165 # locate next slash, if any.
166 indy = next_line.find('/', indy)
169 if ((len(next_line) > indy + 1) and (next_line[indy + 1] == '/')):
170 # switch states and handle any pent-up text.
171 self.normal_accumulator += next_line[0:indy] # get last tidbit before comment start.
172 next_line = next_line[indy:] # keep only the stuff starting at slash.
173 self.state = self.EATING_ONELINE_COMMENT
174 # print("state => oneline comment")
175 self.emit_normal_accumulator()
177 if ((len(next_line) > indy + 1) and (next_line[indy + 1] == '*')):
178 # switch states and deal with accumulated text.
179 self.normal_accumulator += next_line[0:indy] # get last tidbit before comment start.
180 next_line = next_line[indy:] # keep only the stuff starting at slash.
181 self.state = self.EATING_MULTILINE_COMMENT
182 # print("state => multiline comment")
183 self.emit_normal_accumulator()
185 indy += 1 # no matches means we have to keep skipping forward.
187 # now handle things appropriately for our current state.
188 if (self.state == self.EATING_NORMAL_TEXT):
189 # add the text to the normal accumulator.
190 # print("would handle normal text")
191 self.normal_accumulator += next_line + "\n"
192 elif (self.state == self.EATING_ONELINE_COMMENT):
193 # save the text in comment accumulator.
194 # print("would handle oneline comment")
195 self.comment_accumulator += next_line + "\n"
196 self.emit_comment_accumulator()
197 self.state = self.EATING_NORMAL_TEXT
198 elif (self.state == self.EATING_MULTILINE_COMMENT):
199 # save the text in comment accumulator.
200 # print("would handle multiline comment")
201 self.comment_accumulator += next_line + "\n"
202 # check for whether the multi-line comment is completed on this line.
203 if ("*/" in next_line):
204 # print("found completion for multiline comment on line.")
205 self.emit_comment_accumulator()
206 self.state = self.EATING_NORMAL_TEXT
207 # verify we're not in the wrong state still.
208 if (self.state == self.EATING_MULTILINE_COMMENT):
209 print("file seems to have unclosed multi-line comment.")
210 # last step is to spit out whatever was trailing in the accumulator.
211 self.emit_normal_accumulator()
212 # if we got to here, we seem to have happily consumed the file.
215 def replace_all_occurrences(self):
216 """ Orchestrates the process of replacing the phrases. """
217 # process our command line arguments to see what we need to do.
218 try_command_line = self.validate_and_consume_command_line()
219 if (try_command_line != True):
220 print("failed to process the command line...\n")
221 self.print_instructions()
223 # iterate through the list of files we were given and process them.
224 for i in range(0, len(self.files)):
225 print("file {0} is \'{1}\'".format(i, self.files[i]))
226 worked = self.read_file_data(self.files[i])
227 if (worked is False):
228 print("skipping since file read failed on: {0}".format(self.files[i]))
230 # print("{0} got file contents:\n{1}".format(self.files[i], self.file_lines))
231 worked = self.process_file_data()
232 if (worked is False):
233 print("skipping, since processing failed on: {0}".format(self.files[i]))
235 worked = self.write_file_data(self.files[i])
236 if (worked is False):
237 print("writing file back failed on: {0}".format(self.files[i]))
238 print("finished processing all files.")
241 if __name__ == "__main__":
243 slicer = phrase_replacer(sys.argv)
244 slicer.replace_all_occurrences()
248 # parking lot of things to do in future:
250 #hmmm: actually sometimes one DOES want to replace within comments. argh.
251 # make ignoring inside comments an optional thing. later.
253 # hmmm: one little issue here is if the text to be replaced happens to reside on
254 # the same line after a multi-line comment. we are okay with ignoring that
255 # possibility for now since it seems brain-dead to write code that way.