Browse Source

contrib/linearize: Add feature to set file's timestamp based on block header time.

0.10
Jeff Garzik 10 years ago committed by Wladimir J. van der Laan
parent
commit
399cdbc700
No known key found for this signature in database
GPG Key ID: 74810B012346C9A6
  1. 5
      contrib/linearize/README.md
  2. 50
      contrib/linearize/linearize-data.py

5
contrib/linearize/README.md

@ -27,6 +27,7 @@ output.
Optional config file setting for linearize-data: Optional config file setting for linearize-data:
* "netmagic": network magic number * "netmagic": network magic number
* "max_out_sz": maximum output file size (default 1000*1000*1000) * "max_out_sz": maximum output file size (default 1000*1000*1000)
* "split_year": Split files when a new year is first seen, in addition to * "split_timestamp": Split files when a new month is first seen, in addition to
reaching a maximum file size. reaching a maximum file size.
* "file_timestamp": Set each file's last-modified time to that of the
most recent block in that file.

50
contrib/linearize/linearize-data.py

@ -10,11 +10,13 @@
import json import json
import struct import struct
import re import re
import os
import base64 import base64
import httplib import httplib
import sys import sys
import hashlib import hashlib
import datetime import datetime
import time
settings = {} settings = {}
@ -60,9 +62,10 @@ def calc_hash_str(blk_hdr):
def get_blk_dt(blk_hdr): def get_blk_dt(blk_hdr):
members = struct.unpack("<I", blk_hdr[68:68+4]) members = struct.unpack("<I", blk_hdr[68:68+4])
dt = datetime.datetime.fromtimestamp(members[0]) nTime = members[0]
dt = datetime.datetime.fromtimestamp(nTime)
dt_ym = datetime.datetime(dt.year, dt.month, 1) dt_ym = datetime.datetime(dt.year, dt.month, 1)
return dt_ym return (dt_ym, nTime)
def get_block_hashes(settings): def get_block_hashes(settings):
blkindex = [] blkindex = []
@ -87,14 +90,19 @@ def copydata(settings, blkindex, blkset):
outFn = 0 outFn = 0
outsz = 0 outsz = 0
outF = None outF = None
outFname = None
blkCount = 0 blkCount = 0
lastDate = datetime.datetime(2000, 1, 1) lastDate = datetime.datetime(2000, 1, 1)
highTS = 1408893517 - 315360000
timestampSplit = False timestampSplit = False
fileOutput = True fileOutput = True
setFileTime = False
maxOutSz = settings['max_out_sz'] maxOutSz = settings['max_out_sz']
if 'output' in settings: if 'output' in settings:
fileOutput = False fileOutput = False
if settings['file_timestamp'] != 0:
setFileTime = True
if settings['split_timestamp'] != 0: if settings['split_timestamp'] != 0:
timestampSplit = True timestampSplit = True
@ -134,34 +142,41 @@ def copydata(settings, blkindex, blkset):
if not fileOutput and ((outsz + inLen) > maxOutSz): if not fileOutput and ((outsz + inLen) > maxOutSz):
outF.close() outF.close()
if setFileTime:
os.utime(outFname, (int(time.time()), highTS))
outF = None outF = None
outFname = None
outFn = outFn + 1 outFn = outFn + 1
outsz = 0 outsz = 0
if timestampSplit: (blkDate, blkTS) = get_blk_dt(blk_hdr)
blkDate = get_blk_dt(blk_hdr) if timestampSplit and (blkDate > lastDate):
if blkDate > lastDate: print("New month " + blkDate.strftime("%Y-%m") + " @ " + hash_str)
print("New month " + blkDate.strftime("%Y-%m") + " @ " + hash_str) lastDate = blkDate
lastDate = blkDate if outF:
if outF: outF.close()
outF.close() if setFileTime:
outF = None os.utime(outFname, (int(time.time()), highTS))
outFn = outFn + 1 outF = None
outsz = 0 outFname = None
outFn = outFn + 1
outsz = 0
if not outF: if not outF:
if fileOutput: if fileOutput:
fname = settings['output_file'] outFname = settings['output_file']
else: else:
fname = "%s/blk%05d.dat" % (settings['output'], outFn) outFname = "%s/blk%05d.dat" % (settings['output'], outFn)
print("Output file" + fname) print("Output file" + outFname)
outF = open(fname, "wb") outF = open(outFname, "wb")
outF.write(inhdr) outF.write(inhdr)
outF.write(rawblock) outF.write(rawblock)
outsz = outsz + inLen + 8 outsz = outsz + inLen + 8
blkCount = blkCount + 1 blkCount = blkCount + 1
if blkTS > highTS:
highTS = blkTS
if (blkCount % 1000) == 0: if (blkCount % 1000) == 0:
print("Wrote " + str(blkCount) + " blocks") print("Wrote " + str(blkCount) + " blocks")
@ -191,6 +206,8 @@ if __name__ == '__main__':
settings['input'] = 'input' settings['input'] = 'input'
if 'hashlist' not in settings: if 'hashlist' not in settings:
settings['hashlist'] = 'hashlist.txt' settings['hashlist'] = 'hashlist.txt'
if 'file_timestamp' not in settings:
settings['file_timestamp'] = 0
if 'split_timestamp' not in settings: if 'split_timestamp' not in settings:
settings['split_timestamp'] = 0 settings['split_timestamp'] = 0
if 'max_out_sz' not in settings: if 'max_out_sz' not in settings:
@ -198,6 +215,7 @@ if __name__ == '__main__':
settings['max_out_sz'] = long(settings['max_out_sz']) settings['max_out_sz'] = long(settings['max_out_sz'])
settings['split_timestamp'] = int(settings['split_timestamp']) settings['split_timestamp'] = int(settings['split_timestamp'])
settings['file_timestamp'] = int(settings['file_timestamp'])
settings['netmagic'] = settings['netmagic'].decode('hex') settings['netmagic'] = settings['netmagic'].decode('hex')
if 'output_file' not in settings and 'output' not in settings: if 'output_file' not in settings and 'output' not in settings:

Loading…
Cancel
Save