Skip to content

Commit

Permalink
Merge pull request #110 from chrisguida/undo-blocks
Browse files Browse the repository at this point in the history
Undo blocks
  • Loading branch information
alecalve authored Mar 23, 2024
2 parents 82ca8c0 + 173dfaf commit 24fac54
Show file tree
Hide file tree
Showing 10 changed files with 311 additions and 20 deletions.
6 changes: 3 additions & 3 deletions blockchain_parser/block.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from .transaction import Transaction
from .block_header import BlockHeader
from .utils import format_hash, decode_varint, double_sha256
from .utils import format_hash, decode_compactsize, double_sha256


def get_block_transactions(raw_hex):
Expand All @@ -23,7 +23,7 @@ def get_block_transactions(raw_hex):

# Decoding the number of transactions, offset is the size of
# the varint (1 to 9 bytes)
n_transactions, offset = decode_varint(transaction_data)
n_transactions, offset = decode_compactsize(transaction_data)

for i in range(n_transactions):
# Try from 1024 (1KiB) -> 1073741824 (1GiB) slice widths
Expand Down Expand Up @@ -78,7 +78,7 @@ def n_transactions(self):
as there's no need to parse all transactions to get this information
"""
if self._n_transactions is None:
self._n_transactions = decode_varint(self.hex[80:])[0]
self._n_transactions = decode_compactsize(self.hex[80:])[0]

return self._n_transactions

Expand Down
14 changes: 13 additions & 1 deletion blockchain_parser/blockchain.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,22 @@ def get_files(path):
files = map(lambda x: os.path.join(path, x), files)
return sorted(files)

def get_undo_files(path):
"""
Given the path to the .bitcoin directory, returns the sorted list of rev*.dat
files contained in that directory
"""
if not stat.S_ISDIR(os.stat(path)[stat.ST_MODE]):
return [path]
files = os.listdir(path)
files = [f for f in files if f.startswith("rev") and f.endswith(".dat")]
files = map(lambda x: os.path.join(path, x), files)
return sorted(files)


def get_blocks(blockfile):
"""
Given the name of a .blk file, for every block contained in the file,
Given the name of a .dat file, for every block contained in the file,
yields its raw hexadecimal value
"""
with open(blockfile, "rb") as f:
Expand Down
4 changes: 2 additions & 2 deletions blockchain_parser/input.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
# modified, propagated, or distributed except according to the terms contained
# in the LICENSE file.

from .utils import decode_varint, decode_uint32, format_hash
from .utils import decode_compactsize, decode_uint32, format_hash
from .script import Script


Expand All @@ -23,7 +23,7 @@ def __init__(self, raw_hex):
self._sequence_number = None
self._witnesses = []

self._script_length, varint_length = decode_varint(raw_hex[36:])
self._script_length, varint_length = decode_compactsize(raw_hex[36:])
self._script_start = 36 + varint_length

self.size = self._script_start + self._script_length + 4
Expand Down
4 changes: 2 additions & 2 deletions blockchain_parser/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
# modified, propagated, or distributed except according to the terms contained
# in the LICENSE file.

from .utils import decode_varint, decode_uint64
from .utils import decode_compactsize, decode_uint64
from .script import Script
from .address import Address

Expand All @@ -22,7 +22,7 @@ def __init__(self, raw_hex):
self._script = None
self._addresses = None

script_length, varint_size = decode_varint(raw_hex[8:])
script_length, varint_size = decode_compactsize(raw_hex[8:])
script_start = 8 + varint_size

self._script_hex = raw_hex[script_start:script_start+script_length]
Expand Down
10 changes: 5 additions & 5 deletions blockchain_parser/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,12 @@ def test_decode_uint64(self):
for uint64, value in uint64_dict.items():
self.assertEqual(utils.decode_uint64(a2b_hex(uint64)), value)

def test_decode_varint(self):
def test_decode_compactsize(self):
case1 = a2b_hex("fa")
self.assertEqual(utils.decode_varint(case1), (250, 1))
self.assertEqual(utils.decode_compactsize(case1), (250, 1))
case2 = a2b_hex("fd0100")
self.assertEqual(utils.decode_varint(case2), (1, 3))
self.assertEqual(utils.decode_compactsize(case2), (1, 3))
case3 = a2b_hex("fe01000000")
self.assertEqual(utils.decode_varint(case3), (1, 5))
self.assertEqual(utils.decode_compactsize(case3), (1, 5))
case4 = a2b_hex("ff0100000000000000")
self.assertEqual(utils.decode_varint(case4), (1, 9))
self.assertEqual(utils.decode_compactsize(case4), (1, 9))
10 changes: 5 additions & 5 deletions blockchain_parser/transaction.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from math import ceil

from .utils import decode_varint, decode_uint32, double_sha256, format_hash
from .utils import decode_compactsize, decode_uint32, double_sha256, format_hash
from .input import Input
from .output import Output

Expand Down Expand Up @@ -44,7 +44,7 @@ def __init__(self, raw_hex):
self.is_segwit = True
offset += 2

self.n_inputs, varint_size = decode_varint(raw_hex[offset:])
self.n_inputs, varint_size = decode_compactsize(raw_hex[offset:])
offset += varint_size

self.inputs = []
Expand All @@ -53,7 +53,7 @@ def __init__(self, raw_hex):
offset += input.size
self.inputs.append(input)

self.n_outputs, varint_size = decode_varint(raw_hex[offset:])
self.n_outputs, varint_size = decode_compactsize(raw_hex[offset:])
offset += varint_size

self.outputs = []
Expand All @@ -65,10 +65,10 @@ def __init__(self, raw_hex):
if self.is_segwit:
self._offset_before_tx_witnesses = offset
for inp in self.inputs:
tx_witnesses_n, varint_size = decode_varint(raw_hex[offset:])
tx_witnesses_n, varint_size = decode_compactsize(raw_hex[offset:])
offset += varint_size
for j in range(tx_witnesses_n):
component_length, varint_size = decode_varint(
component_length, varint_size = decode_compactsize(
raw_hex[offset:])
offset += varint_size
witness = raw_hex[offset:offset + component_length]
Expand Down
180 changes: 180 additions & 0 deletions blockchain_parser/undo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
# Copyright (C) 2015-2020 The bitcoin-blockchain-parser developers
#
# This file is part of bitcoin-blockchain-parser.
#
# It is subject to the license terms in the LICENSE file found in the top-level
# directory of this distribution.
#
# No part of bitcoin-blockchain-parser, including this file, may be copied,
# modified, propagated, or distributed except according to the terms contained
# in the LICENSE file.

from .utils import decode_varint, decode_compactsize, decompress_txout_amt

def decompress_script(raw_hex):
script_type = raw_hex[0]
compressed_script = raw_hex[1:]

# def decompress_script(compressed_script, script_type):
""" Takes CScript as stored in leveldb and returns it in uncompressed form
(de)compression scheme is defined in bitcoin/src/compressor.cpp
:param compressed_script: raw script bytes hexlified (data in decode_utxo)
:type compressed_script: str
:param script_type: first byte of script data (out_type in decode_utxo)
:type script_type: int
:return: the decompressed CScript
:rtype: str
(this code adapted from https://github.com/sr-gi/bitcoin_tools)
"""

if script_type == 0:
if len(compressed_script) != 20:
raise Exception("Compressed script has wrong size")
script = OutputScript.P2PKH(compressed_script, hash160=True)

elif script_type == 1:
if len(compressed_script) != 20:
raise Exception("Compressed script has wrong size")
script = OutputScript.P2SH(compressed_script)

elif script_type in [2, 3]:
if len(compressed_script) != 33:
raise Exception("Compressed script has wrong size")
script = OutputScript.P2PK(compressed_script)

elif script_type in [4, 5]:
if len(compressed_script) != 33:
raise Exception("Compressed script has wrong size")
prefix = format(script_type - 2, '02')
script = OutputScript.P2PK(get_uncompressed_pk(prefix + compressed_script[2:]))

else:
assert len(compressed_script) / 2 == script_type - NSPECIALSCRIPTS
script = OutputScript.from_hex(compressed_script)

return script.content


class BlockUndo(object):
"""
Represents a block of spent transaction outputs (coins), as encoded
in the undo rev*.dat files
"""
def __init__(self, raw_hex):
self._raw_hex = raw_hex
self.spends = []
num_txs, pos = decode_compactsize(raw_hex)
# print("found %d" % num_txs + " transactions")
for i in range(num_txs):
# print("calling SpentOutput with raw_hex %s", raw_hex)
txn = SpentTransaction(raw_hex=raw_hex[pos:])
self.spends.append(txn)
# print("found transaction #%d length %d hex: " % (i, txn.len), raw_hex[pos:pos+txn.len].hex())
pos += txn.len


class SpentTransaction(object):
"""Represents the script portion of a spent Transaction output"""
def __init__(self, raw_hex=None):
self._raw_hex = raw_hex
self.outputs = []
# print("decoding compactsize for hex: ", raw_hex.hex())
self.output_len, pos = decode_compactsize(raw_hex)
# print("found %d" % self.output_len + " outputs")
for i in range(self.output_len):
output = SpentOutput(raw_hex=raw_hex[pos:])
self.outputs.append(output)
# print("found output #%d length %d hex: " % (i, output.len), raw_hex[pos:pos+output.len].hex())
pos += output.len
self.len = pos

@classmethod
def from_hex(cls, hex_):
return cls(hex_)


class SpentOutput(object):
"""Represents a spent Transaction output"""

def __init__(self, raw_hex=None):
# print("decoding output: ", raw_hex.hex())
self._raw_hex = raw_hex
pos = 0
# self.version = raw_hex[pos]
# pos += 1

# decode height code
height_code, height_code_len = decode_varint(raw_hex[pos:])
# print("found height code : ", height_code, height_code_len)
if height_code % 2 == 1:
self.is_coinbase = True
height_code -= 1
else:
self.is_coinbase = False
self.height = height_code // 2

# print("found height: ", self.height)

# skip byte reserved only for backwards compatibility, should always be 0x00
pos += height_code_len + 1

# decode compressed txout amount
compressed_amt, compressed_amt_len = decode_varint(raw_hex[pos:])
self.amt = decompress_txout_amt(compressed_amt)
pos += compressed_amt_len

# get script
script_hex, script_pub_key_compressed_len = SpentScriptPubKey.extract_from_hex(raw_hex[pos:])
self.script_pub_key_compressed = SpentScriptPubKey(script_hex)
self.len = pos + self.script_pub_key_compressed.len

@classmethod
def from_hex(cls, hex_):
return cls(hex_)


@property
def script(self):
if not self.script:
self.script = decompress_script(self.script_pub_key_compressed)
return self.script



class SpentScriptPubKey(object):
"""Represents the script portion of a spent Transaction output"""
def __init__(self, raw_hex=None):
self._raw_hex = raw_hex
self.len = len(raw_hex)
# self.script_hex = raw_hex[1:]

@classmethod
def from_hex(cls, hex_):
return cls(hex_)

@classmethod
def extract_from_hex(cls, raw_hex):
"""
docstring
"""
if raw_hex[0] in (0x00, 0x01):
return (raw_hex[:21], 21)
elif raw_hex[0] in (0x02, 0x03):
return (raw_hex[:33], 33)
elif raw_hex[0] in (0x04, 0x05):
# print("found strange script type: ", raw_hex[0])
return (raw_hex[:33], 33)
else:
# print("found strange script type: ", raw_hex[0])
# print("decoding compactsize for raw hex: ", raw_hex.hex())
script_len_code, script_len_code_len = decode_varint(raw_hex)
# print("script_len_code, script_len_code_len: (%s, %s)" % (script_len_code, script_len_code_len))
real_script_len = script_len_code - 6
# print("real_script_len: %d" % real_script_len)
return (raw_hex[:script_len_code_len+real_script_len], real_script_len)

@property
def script(self):
if not self.script:
self.script = decompress_script(self._raw_hex)
return self.script
Loading

0 comments on commit 24fac54

Please sign in to comment.