Browse Source

devtools: Make github-merge compute SHA512 from git, instead of worktree

This changes tree_sha512sum() to requests the objects for hashing from
git instead of from the working tree.

The change should make the process more deterministic (it hashes what
will be pushed) and hopefully avoids the frequent miscomputed SHA512's
that happen now.
0.15
Wladimir J. van der Laan 8 years ago
parent
commit
a327e8ea30
  1. 45
      contrib/devtools/github-merge.py

45
contrib/devtools/github-merge.py

@ -78,24 +78,53 @@ def get_symlink_files(): @@ -78,24 +78,53 @@ def get_symlink_files():
ret.append(f.decode('utf-8').split("\t")[1])
return ret
def tree_sha512sum():
files = sorted(subprocess.check_output([GIT, 'ls-tree', '--full-tree', '-r', '--name-only', 'HEAD']).splitlines())
def tree_sha512sum(commit='HEAD'):
# request metadata for entire tree, recursively
files = []
blob_by_name = {}
for line in subprocess.check_output([GIT, 'ls-tree', '--full-tree', '-r', commit]).splitlines():
name_sep = line.index(b'\t')
metadata = line[:name_sep].split() # perms, 'blob', blobid
assert(metadata[1] == b'blob')
name = line[name_sep+1:]
files.append(name)
blob_by_name[name] = metadata[2]
files.sort()
# open connection to git-cat-file in batch mode to request data for all blobs
# this is much faster than launching it per file
p = subprocess.Popen([GIT, 'cat-file', '--batch'], stdout=subprocess.PIPE, stdin=subprocess.PIPE)
overall = hashlib.sha512()
for f in files:
blob = blob_by_name[f]
# request blob
p.stdin.write(blob + b'\n')
p.stdin.flush()
# read header: blob, "blob", size
reply = p.stdout.readline().split()
assert(reply[0] == blob and reply[1] == b'blob')
size = int(reply[2])
# hash the blob data
intern = hashlib.sha512()
fi = open(f, 'rb')
while True:
piece = fi.read(65536)
if piece:
ptr = 0
while ptr < size:
bs = min(65536, size - ptr)
piece = p.stdout.read(bs)
if len(piece) == bs:
intern.update(piece)
else:
break
fi.close()
raise IOError('Premature EOF reading git cat-file output')
ptr += bs
dig = intern.hexdigest()
assert(p.stdout.read(1) == b'\n') # ignore LF that follows blob data
# update overall hash with file hash
overall.update(dig.encode("utf-8"))
overall.update(" ".encode("utf-8"))
overall.update(f)
overall.update("\n".encode("utf-8"))
p.stdin.close()
if p.wait():
raise IOError('Non-zero return value executing git cat-file')
return overall.hexdigest()

Loading…
Cancel
Save