@ -78,24 +78,53 @@ def get_symlink_files():
ret . append ( f . decode ( ' utf-8 ' ) . split ( " \t " ) [ 1 ] )
ret . append ( f . decode ( ' utf-8 ' ) . split ( " \t " ) [ 1 ] )
return ret
return ret
def tree_sha512sum ( ) :
def tree_sha512sum ( commit = ' HEAD ' ) :
files = sorted ( subprocess . check_output ( [ GIT , ' ls-tree ' , ' --full-tree ' , ' -r ' , ' --name-only ' , ' HEAD ' ] ) . splitlines ( ) )
# request metadata for entire tree, recursively
files = [ ]
blob_by_name = { }
for line in subprocess . check_output ( [ GIT , ' ls-tree ' , ' --full-tree ' , ' -r ' , commit ] ) . splitlines ( ) :
name_sep = line . index ( b ' \t ' )
metadata = line [ : name_sep ] . split ( ) # perms, 'blob', blobid
assert ( metadata [ 1 ] == b ' blob ' )
name = line [ name_sep + 1 : ]
files . append ( name )
blob_by_name [ name ] = metadata [ 2 ]
files . sort ( )
# open connection to git-cat-file in batch mode to request data for all blobs
# this is much faster than launching it per file
p = subprocess . Popen ( [ GIT , ' cat-file ' , ' --batch ' ] , stdout = subprocess . PIPE , stdin = subprocess . PIPE )
overall = hashlib . sha512 ( )
overall = hashlib . sha512 ( )
for f in files :
for f in files :
blob = blob_by_name [ f ]
# request blob
p . stdin . write ( blob + b ' \n ' )
p . stdin . flush ( )
# read header: blob, "blob", size
reply = p . stdout . readline ( ) . split ( )
assert ( reply [ 0 ] == blob and reply [ 1 ] == b ' blob ' )
size = int ( reply [ 2 ] )
# hash the blob data
intern = hashlib . sha512 ( )
intern = hashlib . sha512 ( )
fi = open ( f , ' rb ' )
ptr = 0
while True :
while ptr < size :
piece = fi . read ( 65536 )
bs = min ( 65536 , size - ptr )
if piece :
piece = p . stdout . read ( bs )
if len ( piece ) == bs :
intern . update ( piece )
intern . update ( piece )
else :
else :
break
raise IOError ( ' Premature EOF reading git cat-file output ' )
fi . close ( )
ptr + = bs
dig = intern . hexdigest ( )
dig = intern . hexdigest ( )
assert ( p . stdout . read ( 1 ) == b ' \n ' ) # ignore LF that follows blob data
# update overall hash with file hash
overall . update ( dig . encode ( " utf-8 " ) )
overall . update ( dig . encode ( " utf-8 " ) )
overall . update ( " " . encode ( " utf-8 " ) )
overall . update ( " " . encode ( " utf-8 " ) )
overall . update ( f )
overall . update ( f )
overall . update ( " \n " . encode ( " utf-8 " ) )
overall . update ( " \n " . encode ( " utf-8 " ) )
p . stdin . close ( )
if p . wait ( ) :
raise IOError ( ' Non-zero return value executing git cat-file ' )
return overall . hexdigest ( )
return overall . hexdigest ( )