Skip to content

Commit

Permalink
Merge bitcoin#9373: Linearize script update (hash byte reversal and P…
Browse files Browse the repository at this point in the history
…ython 3 support)

3c8f63b Make linearize scripts Python 3-compatible. (Doug)
d5aa198 Allow linearization scripts to support hash byte reversal (Doug)
  • Loading branch information
laanwj committed Jan 5, 2017
2 parents cfe41d7 + 3c8f63b commit 406f35d
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 37 deletions.
45 changes: 30 additions & 15 deletions contrib/linearize/README.md
Original file line number Diff line number Diff line change
@@ -1,33 +1,48 @@
# Linearize
Construct a linear, no-fork, best version of the blockchain.
Construct a linear, no-fork, best version of the Bitcoin blockchain. The scripts
run using Python 3 but are compatible with Python 2.

## Step 1: Download hash list

$ ./linearize-hashes.py linearize.cfg > hashlist.txt

Required configuration file settings for linearize-hashes:
* RPC: rpcuser, rpcpassword
* RPC: `rpcuser`, `rpcpassword`

Optional config file setting for linearize-hashes:
* RPC: host, port
* Block chain: min_height, max_height
* RPC: `host` (Default: `127.0.0.1`)
* RPC: `port` (Default: `8332`)
* Blockchain: `min_height`, `max_height`
* `rev_hash_bytes`: If true, the written block hash list will be
byte-reversed. (In other words, the hash returned by getblockhash will have its
bytes reversed.) False by default. Intended for generation of
standalone hash lists but safe to use with linearize-data.py, which will output
the same data no matter which byte format is chosen.

The `linearize-hashes` script requires a connection, local or remote, to a
JSON-RPC server. Running `bitcoind` or `bitcoin-qt -server` will be sufficient.

## Step 2: Copy local block data

$ ./linearize-data.py linearize.cfg

Required configuration file settings:
* "input": bitcoind blocks/ directory containing blkNNNNN.dat
* "hashlist": text file containing list of block hashes, linearized-hashes.py
output.
* "output_file": bootstrap.dat
* `output_file`: The file that will contain the final blockchain.
or
* "output": output directory for linearized blocks/blkNNNNN.dat output
* `output`: Output directory for linearized `blocks/blkNNNNN.dat` output.

Optional config file setting for linearize-data:
* "netmagic": network magic number
* "max_out_sz": maximum output file size (default `1000*1000*1000`)
* "split_timestamp": Split files when a new month is first seen, in addition to
reaching a maximum file size.
* "file_timestamp": Set each file's last-modified time to that of the
most recent block in that file.
* `file_timestamp`: Set each file's last-modified time to that of the most
recent block in that file.
* `genesis`: The hash of the genesis block in the blockchain.
* `input`: bitcoind blocks/ directory containing blkNNNNN.dat
* `hashlist`: text file containing list of block hashes created by
linearize-hashes.py.
* `max_out_sz`: Maximum size for files created by the `output_file` option.
(Default: `1000*1000*1000 bytes`)
* `netmagic`: Network magic number.
* `rev_hash_bytes`: If true, the block hash list written by linearize-hashes.py
will be byte-reversed when read by linearize-data.py. See the linearize-hashes
entry for more information.
* `split_timestamp`: Split blockchain files when a new month is first seen, in
addition to reaching a maximum file size (`max_out_sz`).
4 changes: 3 additions & 1 deletion contrib/linearize/example-linearize.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ input=/home/example/.bitcoin/blocks

output_file=/home/example/Downloads/bootstrap.dat
hashlist=hashlist.txt
split_year=1

# Maxmimum size in bytes of out-of-order blocks cache in memory
out_of_order_cache_sz = 100000000

# Do we want the reverse the hash bytes coming from getblockhash?
rev_hash_bytes = False
43 changes: 31 additions & 12 deletions contrib/linearize/linearize-data.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3
#
# linearize-data.py: Construct a linear, no-fork version of the chain.
#
Expand All @@ -8,23 +8,33 @@
#

from __future__ import print_function, division
try: # Python 3
import http.client as httplib
except ImportError: # Python 2
import httplib
import json
import struct
import re
import os
import os.path
import base64
import httplib
import sys
import hashlib
import datetime
import time
from collections import namedtuple
from binascii import hexlify, unhexlify

settings = {}

##### Switch endian-ness #####
def hex_switchEndian(s):
""" Switches the endianness of a hex string (in pairs of hex chars) """
pairList = [s[i:i+2].encode() for i in range(0, len(s), 2)]
return b''.join(pairList[::-1]).decode()

def uint32(x):
return x & 0xffffffffL
return x & 0xffffffff

def bytereverse(x):
return uint32(( ((x) << 24) | (((x) << 8) & 0x00ff0000) |
Expand All @@ -35,14 +45,14 @@ def bufreverse(in_buf):
for i in range(0, len(in_buf), 4):
word = struct.unpack('@I', in_buf[i:i+4])[0]
out_words.append(struct.pack('@I', bytereverse(word)))
return ''.join(out_words)
return b''.join(out_words)

def wordreverse(in_buf):
out_words = []
for i in range(0, len(in_buf), 4):
out_words.append(in_buf[i:i+4])
out_words.reverse()
return ''.join(out_words)
return b''.join(out_words)

def calc_hdr_hash(blk_hdr):
hash1 = hashlib.sha256()
Expand All @@ -59,7 +69,7 @@ def calc_hash_str(blk_hdr):
hash = calc_hdr_hash(blk_hdr)
hash = bufreverse(hash)
hash = wordreverse(hash)
hash_str = hash.encode('hex')
hash_str = hexlify(hash).decode('utf-8')
return hash_str

def get_blk_dt(blk_hdr):
Expand All @@ -69,17 +79,21 @@ def get_blk_dt(blk_hdr):
dt_ym = datetime.datetime(dt.year, dt.month, 1)
return (dt_ym, nTime)

# When getting the list of block hashes, undo any byte reversals.
def get_block_hashes(settings):
blkindex = []
f = open(settings['hashlist'], "r")
for line in f:
line = line.rstrip()
if settings['rev_hash_bytes'] == 'true':
line = hex_switchEndian(line)
blkindex.append(line)

print("Read " + str(len(blkindex)) + " hashes")

return blkindex

# The block map shouldn't give or receive byte-reversed hashes.
def mkblockmap(blkindex):
blkmap = {}
for height,hash in enumerate(blkindex):
Expand Down Expand Up @@ -207,7 +221,7 @@ def run(self):

inMagic = inhdr[:4]
if (inMagic != self.settings['netmagic']):
print("Invalid magic: " + inMagic.encode('hex'))
print("Invalid magic: " + hexlify(inMagic).decode('utf-8'))
return
inLenLE = inhdr[4:]
su = struct.unpack("<I", inLenLE)
Expand Down Expand Up @@ -265,6 +279,12 @@ def run(self):
settings[m.group(1)] = m.group(2)
f.close()

# Force hash byte format setting to be lowercase to make comparisons easier.
# Also place upfront in case any settings need to know about it.
if 'rev_hash_bytes' not in settings:
settings['rev_hash_bytes'] = 'false'
settings['rev_hash_bytes'] = settings['rev_hash_bytes'].lower()

if 'netmagic' not in settings:
settings['netmagic'] = 'f9beb4d9'
if 'genesis' not in settings:
Expand All @@ -278,14 +298,14 @@ def run(self):
if 'split_timestamp' not in settings:
settings['split_timestamp'] = 0
if 'max_out_sz' not in settings:
settings['max_out_sz'] = 1000L * 1000 * 1000
settings['max_out_sz'] = 1000 * 1000 * 1000
if 'out_of_order_cache_sz' not in settings:
settings['out_of_order_cache_sz'] = 100 * 1000 * 1000

settings['max_out_sz'] = long(settings['max_out_sz'])
settings['max_out_sz'] = int(settings['max_out_sz'])
settings['split_timestamp'] = int(settings['split_timestamp'])
settings['file_timestamp'] = int(settings['file_timestamp'])
settings['netmagic'] = settings['netmagic'].decode('hex')
settings['netmagic'] = unhexlify(settings['netmagic'].encode('utf-8'))
settings['out_of_order_cache_sz'] = int(settings['out_of_order_cache_sz'])

if 'output_file' not in settings and 'output' not in settings:
Expand All @@ -295,9 +315,8 @@ def run(self):
blkindex = get_block_hashes(settings)
blkmap = mkblockmap(blkindex)

# Block hash map won't be byte-reversed. Neither should the genesis hash.
if not settings['genesis'] in blkmap:
print("Genesis block not found in hashlist")
else:
BlockDataCopier(settings, blkindex, blkmap).run()


42 changes: 33 additions & 9 deletions contrib/linearize/linearize-hashes.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3
#
# linearize-hashes.py: List blocks in a linear, no-fork version of the chain.
#
Expand All @@ -8,32 +8,47 @@
#

from __future__ import print_function
try: # Python 3
import http.client as httplib
except ImportError: # Python 2
import httplib
import json
import struct
import re
import base64
import httplib
import sys

settings = {}

##### Switch endian-ness #####
def hex_switchEndian(s):
""" Switches the endianness of a hex string (in pairs of hex chars) """
pairList = [s[i:i+2].encode() for i in range(0, len(s), 2)]
return b''.join(pairList[::-1]).decode()

class BitcoinRPC:
def __init__(self, host, port, username, password):
authpair = "%s:%s" % (username, password)
self.authhdr = "Basic %s" % (base64.b64encode(authpair))
self.conn = httplib.HTTPConnection(host, port, False, 30)
authpair = authpair.encode('utf-8')
self.authhdr = b"Basic " + base64.b64encode(authpair)
self.conn = httplib.HTTPConnection(host, port=port, timeout=30)

def execute(self, obj):
self.conn.request('POST', '/', json.dumps(obj),
{ 'Authorization' : self.authhdr,
'Content-type' : 'application/json' })
try:
self.conn.request('POST', '/', json.dumps(obj),
{ 'Authorization' : self.authhdr,
'Content-type' : 'application/json' })
except ConnectionRefusedError:
print('RPC connection refused. Check RPC settings and the server status.',
file=sys.stderr)
return None

resp = self.conn.getresponse()
if resp is None:
print("JSON-RPC: no response", file=sys.stderr)
return None

body = resp.read()
body = resp.read().decode('utf-8')
resp_obj = json.loads(body)
return resp_obj

Expand Down Expand Up @@ -64,12 +79,17 @@ def get_block_hashes(settings, max_blocks_per_call=10000):
batch.append(rpc.build_request(x, 'getblockhash', [height + x]))

reply = rpc.execute(batch)
if reply is None:
print('Cannot continue. Program will halt.')
return None

for x,resp_obj in enumerate(reply):
if rpc.response_is_error(resp_obj):
print('JSON-RPC: error at height', height+x, ': ', resp_obj['error'], file=sys.stderr)
exit(1)
assert(resp_obj['id'] == x) # assume replies are in-sequence
if settings['rev_hash_bytes'] == 'true':
resp_obj['result'] = hex_switchEndian(resp_obj['result'])
print(resp_obj['result'])

height += num_blocks
Expand Down Expand Up @@ -101,6 +121,8 @@ def get_block_hashes(settings, max_blocks_per_call=10000):
settings['min_height'] = 0
if 'max_height' not in settings:
settings['max_height'] = 313000
if 'rev_hash_bytes' not in settings:
settings['rev_hash_bytes'] = 'false'
if 'rpcuser' not in settings or 'rpcpassword' not in settings:
print("Missing username and/or password in cfg file", file=stderr)
sys.exit(1)
Expand All @@ -109,5 +131,7 @@ def get_block_hashes(settings, max_blocks_per_call=10000):
settings['min_height'] = int(settings['min_height'])
settings['max_height'] = int(settings['max_height'])

get_block_hashes(settings)
# Force hash byte format setting to be lowercase to make comparisons easier.
settings['rev_hash_bytes'] = settings['rev_hash_bytes'].lower()

get_block_hashes(settings)

0 comments on commit 406f35d

Please sign in to comment.