Browse Source

contrib/linearize: Add feature to set file's timestamp based on block header time.

0.10
Jeff Garzik 10 years ago committed by Wladimir J. van der Laan
parent
commit
399cdbc700
No known key found for this signature in database
GPG Key ID: 74810B012346C9A6
  1. 5
      contrib/linearize/README.md
  2. 50
      contrib/linearize/linearize-data.py

5
contrib/linearize/README.md

@ -27,6 +27,7 @@ output. @@ -27,6 +27,7 @@ output.
Optional config file setting for linearize-data:
* "netmagic": network magic number
* "max_out_sz": maximum output file size (default 1000*1000*1000)
* "split_year": Split files when a new year is first seen, in addition to
* "split_timestamp": Split files when a new month is first seen, in addition to
reaching a maximum file size.
* "file_timestamp": Set each file's last-modified time to that of the
most recent block in that file.

50
contrib/linearize/linearize-data.py

@ -10,11 +10,13 @@ @@ -10,11 +10,13 @@
import json
import struct
import re
import os
import base64
import httplib
import sys
import hashlib
import datetime
import time
settings = {}
@ -60,9 +62,10 @@ def calc_hash_str(blk_hdr): @@ -60,9 +62,10 @@ def calc_hash_str(blk_hdr):
def get_blk_dt(blk_hdr):
members = struct.unpack("<I", blk_hdr[68:68+4])
dt = datetime.datetime.fromtimestamp(members[0])
nTime = members[0]
dt = datetime.datetime.fromtimestamp(nTime)
dt_ym = datetime.datetime(dt.year, dt.month, 1)
return dt_ym
return (dt_ym, nTime)
def get_block_hashes(settings):
blkindex = []
@ -87,14 +90,19 @@ def copydata(settings, blkindex, blkset): @@ -87,14 +90,19 @@ def copydata(settings, blkindex, blkset):
outFn = 0
outsz = 0
outF = None
outFname = None
blkCount = 0
lastDate = datetime.datetime(2000, 1, 1)
highTS = 1408893517 - 315360000
timestampSplit = False
fileOutput = True
setFileTime = False
maxOutSz = settings['max_out_sz']
if 'output' in settings:
fileOutput = False
if settings['file_timestamp'] != 0:
setFileTime = True
if settings['split_timestamp'] != 0:
timestampSplit = True
@ -134,34 +142,41 @@ def copydata(settings, blkindex, blkset): @@ -134,34 +142,41 @@ def copydata(settings, blkindex, blkset):
if not fileOutput and ((outsz + inLen) > maxOutSz):
outF.close()
if setFileTime:
os.utime(outFname, (int(time.time()), highTS))
outF = None
outFname = None
outFn = outFn + 1
outsz = 0
if timestampSplit:
blkDate = get_blk_dt(blk_hdr)
if blkDate > lastDate:
print("New month " + blkDate.strftime("%Y-%m") + " @ " + hash_str)
lastDate = blkDate
if outF:
outF.close()
outF = None
outFn = outFn + 1
outsz = 0
(blkDate, blkTS) = get_blk_dt(blk_hdr)
if timestampSplit and (blkDate > lastDate):
print("New month " + blkDate.strftime("%Y-%m") + " @ " + hash_str)
lastDate = blkDate
if outF:
outF.close()
if setFileTime:
os.utime(outFname, (int(time.time()), highTS))
outF = None
outFname = None
outFn = outFn + 1
outsz = 0
if not outF:
if fileOutput:
fname = settings['output_file']
outFname = settings['output_file']
else:
fname = "%s/blk%05d.dat" % (settings['output'], outFn)
print("Output file" + fname)
outF = open(fname, "wb")
outFname = "%s/blk%05d.dat" % (settings['output'], outFn)
print("Output file" + outFname)
outF = open(outFname, "wb")
outF.write(inhdr)
outF.write(rawblock)
outsz = outsz + inLen + 8
blkCount = blkCount + 1
if blkTS > highTS:
highTS = blkTS
if (blkCount % 1000) == 0:
print("Wrote " + str(blkCount) + " blocks")
@ -191,6 +206,8 @@ if __name__ == '__main__': @@ -191,6 +206,8 @@ if __name__ == '__main__':
settings['input'] = 'input'
if 'hashlist' not in settings:
settings['hashlist'] = 'hashlist.txt'
if 'file_timestamp' not in settings:
settings['file_timestamp'] = 0
if 'split_timestamp' not in settings:
settings['split_timestamp'] = 0
if 'max_out_sz' not in settings:
@ -198,6 +215,7 @@ if __name__ == '__main__': @@ -198,6 +215,7 @@ if __name__ == '__main__':
settings['max_out_sz'] = long(settings['max_out_sz'])
settings['split_timestamp'] = int(settings['split_timestamp'])
settings['file_timestamp'] = int(settings['file_timestamp'])
settings['netmagic'] = settings['netmagic'].decode('hex')
if 'output_file' not in settings and 'output' not in settings:

Loading…
Cancel
Save