Merge pull request #49 from Snawoot/1337-diff-improvements

1337-diff improvements
This commit is contained in:
Vladislav Yarmak 2019-02-06 16:12:36 +02:00 committed by GitHub
commit 5adcc24a94
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -22,6 +22,11 @@ class LengthMismatchException(ByteDiffException):
pass pass
class DiffLimitException(ByteDiffException):
""" Throwed when difference limit hit """
pass
def check_positive_int(value): def check_positive_int(value):
value = int(value) value = int(value)
if value <= 0: if value <= 0:
@ -64,31 +69,43 @@ def feed_chunks(f, chunk_size=4096):
yield buf yield buf
def zip_files_bytes(*files): def zip_files_bytes(left, right):
""" Iterate over two files, returning pair of bytes. """ Iterate over two files, returning pair of bytes.
Throw LengthMismatch if file sizes is uneven. """ Throw LengthMismatch if file sizes is uneven. """
class EndMarker(object): class EndMarker(object):
pass pass
end_marker = EndMarker() end_marker = EndMarker()
iterators = (itertools.chain.from_iterable(feed_chunks(f)) for f in files) left_iter = itertools.chain.from_iterable(
for tup in itertools.zip_longest(*iterators, fillvalue=end_marker): feed_chunks(left))
if any(v is end_marker for v in tup): right_iter = itertools.chain.from_iterable(
feed_chunks(right))
for a, b in itertools.zip_longest(left_iter,
right_iter,
fillvalue=end_marker):
if a is end_marker or b is end_marker:
raise LengthMismatchException("Length of input files inequal.") raise LengthMismatchException("Length of input files inequal.")
yield tup yield a, b
def diff(left, right): def diff(left, right, limit=None):
for offset, (a, b) in enumerate(zip_files_bytes(left, right)): offset = 0
diff_count = 0
for a, b in zip_files_bytes(left, right):
if a != b: if a != b:
diff_count += 1
if limit is not None and diff_count > limit:
raise DiffLimitException()
yield offset, a, b yield offset, a, b
offset += 1
def compose_diff_file(orig, patched, output, header, offset_adjustment=True): def compose_diff_file(orig, patched, output, header, *,
limit=None, offset_adjustment=True):
output.write(HEADER_FORMAT % (header.encode('latin-1'),)) output.write(HEADER_FORMAT % (header.encode('latin-1'),))
for offset, a, b in diff(orig, patched): adj = OFFSET_ADJUSTMENT if offset_adjustment else 0
o = offset + OFFSET_ADJUSTMENT if offset_adjustment else offset for offset, a, b in diff(orig, patched, limit):
output.write(LINE_FORMAT % (o, a, b)) output.write(LINE_FORMAT % (offset + adj, a, b))
def main(): def main():
@ -109,10 +126,14 @@ def main():
open(args.patched_file, 'rb') as patched,\ open(args.patched_file, 'rb') as patched,\
open(output_filename, 'wb') as output: open(output_filename, 'wb') as output:
try: try:
compose_diff_file(orig, patched, output, header_filename) compose_diff_file(orig, patched, output, header_filename,
limit=args.limit)
except LengthMismatchException: except LengthMismatchException:
print("Input files have inequal length. Aborting...", print("Input files have inequal length. Aborting...",
file=sys.stderr) file=sys.stderr)
except DiffLimitException:
print("Differences limit hit. Aborting...",
file=sys.stderr)
if __name__ == '__main__': if __name__ == '__main__':