|
| 1 | +""" |
| 2 | +Project for Week 4 of "Python Data Representations". |
| 3 | +Find differences in file contents. |
| 4 | +
|
| 5 | +Be sure to read the project description page for further information |
| 6 | +about the expected behavior of the program. |
| 7 | +""" |
| 8 | + |
| 9 | +IDENTICAL = -1 |
| 10 | + |
| 11 | +def singleline_diff(line1, line2): |
| 12 | + """ |
| 13 | + Inputs: |
| 14 | + line1 - first single line string |
| 15 | + line2 - second single line string |
| 16 | + Output: |
| 17 | + Returns the index where the first difference between |
| 18 | + line1 and line2 occurs. |
| 19 | +
|
| 20 | + Returns IDENTICAL if the two lines are the same. |
| 21 | + """ |
| 22 | + length1 = len(line1) |
| 23 | + length2 = len(line2) |
| 24 | + |
| 25 | + if length1 <= length2: |
| 26 | + min_length = length1 |
| 27 | + else: |
| 28 | + min_length = length2 |
| 29 | + |
| 30 | + if line1 == line2 : |
| 31 | + return IDENTICAL |
| 32 | + else: |
| 33 | + for index in range(min_length): |
| 34 | + if line1[index] != line2[index]: |
| 35 | + return index |
| 36 | + return min_length |
| 37 | + |
| 38 | + |
| 39 | +def singleline_diff_format(line1, line2, idx): |
| 40 | + """ |
| 41 | + Inputs: |
| 42 | + line1 - first single line string |
| 43 | + line2 - second single line string |
| 44 | + idx - index at which to indicate difference |
| 45 | + Output: |
| 46 | + Returns a three line formatted string showing the location |
| 47 | + of the first difference between line1 and line2. |
| 48 | +
|
| 49 | + If either input line contains a newline or carriage return, |
| 50 | + then returns an empty string. |
| 51 | +
|
| 52 | + If idx is not a valid index, then returns an empty string. |
| 53 | + """ |
| 54 | + length1 = len(line1) |
| 55 | + length2 = len(line2) |
| 56 | + word = "" |
| 57 | + index = 0 |
| 58 | + |
| 59 | + if line1.find("\n") >= 0 or line2.find("\n") >= 0: |
| 60 | + word += "" |
| 61 | + elif line1.find("\t") >= 0 or line2.find("\t") >= 0: |
| 62 | + word += "" |
| 63 | + |
| 64 | + if idx > length1 or idx > length2 or idx < 0: |
| 65 | + return "" |
| 66 | + |
| 67 | + if length1 == length2: |
| 68 | + if idx == -1: |
| 69 | + return "" |
| 70 | + else: |
| 71 | + while index < idx: |
| 72 | + word += "=" |
| 73 | + index += 1 |
| 74 | + word += "^" |
| 75 | + elif length1 < length2: |
| 76 | + while index < idx: |
| 77 | + word += "=" |
| 78 | + index += 1 |
| 79 | + word += "^" |
| 80 | + elif length1 > length2: |
| 81 | + while index < idx: |
| 82 | + word += "=" |
| 83 | + index += 1 |
| 84 | + word += "^" |
| 85 | + |
| 86 | + new_word = line1 + "\n" + word + "\n" + line2 + "\n" |
| 87 | + |
| 88 | + return new_word |
| 89 | + |
| 90 | +def multiline_diff(lines1, lines2): |
| 91 | + """ |
| 92 | + Inputs: |
| 93 | + lines1 - list of single line strings |
| 94 | + lines2 - list of single line strings |
| 95 | + Output: |
| 96 | + Returns a tuple containing the line number (starting from 0) and |
| 97 | + the index in that line where the first difference between lines1 |
| 98 | + and lines2 occurs. |
| 99 | +
|
| 100 | + Returns (IDENTICAL, IDENTICAL) if the two lists are the same. |
| 101 | + """ |
| 102 | + length1 = len(lines1) |
| 103 | + length2 = len(lines2) |
| 104 | + |
| 105 | + if length1 < length2 : |
| 106 | + min_length = length1 |
| 107 | + else: |
| 108 | + min_length = length2 |
| 109 | + if length1 == length2 and lines1 != lines2 : |
| 110 | + for index in range(length1): |
| 111 | + line1 = lines1[index] |
| 112 | + line2 = lines2[index] |
| 113 | + diff_index = singleline_diff(line1,line2) |
| 114 | + if diff_index >= 0: |
| 115 | + return (index , diff_index) |
| 116 | + |
| 117 | + |
| 118 | + elif length1 < length2 or length2 < length1 : |
| 119 | + if min_length == 0: |
| 120 | + return ( min_length , 0) |
| 121 | + elif lines1[min_length-1] == lines2[min_length-1] : |
| 122 | + return (min_length , 0) |
| 123 | + else: |
| 124 | + for index in range(min_length): |
| 125 | + line1 = lines1[index] |
| 126 | + line2 = lines2[index] |
| 127 | + diff_index = singleline_diff(line1,line2) |
| 128 | + return (index , diff_index) |
| 129 | + return (IDENTICAL, IDENTICAL) |
| 130 | + |
| 131 | + |
| 132 | +def get_file_lines(filename): |
| 133 | + """ |
| 134 | + Inputs: |
| 135 | + filename - name of file to read |
| 136 | + Output: |
| 137 | + Returns a list of lines from the file named filename. Each |
| 138 | + line will be a single line string with no newline ('\n') or |
| 139 | + return ('\r') characters. |
| 140 | +
|
| 141 | + If the file does not exist or is not readable, then the |
| 142 | + behavior of this function is undefined. |
| 143 | + """ |
| 144 | + |
| 145 | + file = open(filename , "rt") |
| 146 | + |
| 147 | + the_list = [] |
| 148 | + for line in file : |
| 149 | + new_line = line.rstrip() |
| 150 | + the_list.append(new_line) |
| 151 | + |
| 152 | + file.close() |
| 153 | + return the_list |
| 154 | + |
| 155 | +def file_diff_format(filename1, filename2): |
| 156 | + """ |
| 157 | + Inputs: |
| 158 | + filename1 - name of first file |
| 159 | + filename2 - name of second file |
| 160 | + Output: |
| 161 | + Returns a four line string showing the location of the first |
| 162 | + difference between the two files named by the inputs. |
| 163 | +
|
| 164 | + If the files are identical, the function instead returns the |
| 165 | + string "No differences\n". |
| 166 | +
|
| 167 | + If either file does not exist or is not readable, then the |
| 168 | + behavior of this function is undefined. |
| 169 | + """ |
| 170 | + file1 = get_file_lines(filename1) |
| 171 | + file2 = get_file_lines(filename2) |
| 172 | + line_index = multiline_diff(file1 , file2) |
| 173 | + |
| 174 | + if line_index == (-1, -1): |
| 175 | + return "No differences\n" |
| 176 | + else: |
| 177 | + num_of_line = line_index[0] |
| 178 | + num_of_index = line_index[1] |
| 179 | + output = "Line " + str(num_of_line) +":\n" + \ |
| 180 | + singleline_diff_format(file1[num_of_line],file2[num_of_line],num_of_index) |
| 181 | + return output |
0 commit comments