From 48a28be90b2611d77caa70a324111586cd082fb8 Mon Sep 17 00:00:00 2001 From: Balakrishnan Date: Wed, 9 Dec 2020 12:09:49 +0530 Subject: [PATCH] Add support to convert RDB to CSV file How to run : ``` rdb --command csv /var/redis/6379/dump.rdb ``` --- README.md | 14 ++++- rdbtools/__init__.py | 4 +- rdbtools/callbacks.py | 118 ++++++++++++++++++++++++++++++++++++++++++ rdbtools/cli/rdb.py | 3 +- 4 files changed, 135 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index a390baa..d46ec71 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ To install from source : # Command line usage examples Every run of RDB Tool requires to specify a command to indicate what should be done with the parsed RDB data. -Valid commands are: json, diff, justkeys, justkeyvals and protocol. +Valid commands are: json, csv, diff, justkeys, justkeyvals and protocol. JSON from a two database dump: @@ -48,6 +48,18 @@ JSON from a two database dump: "armadillo":["chacoan naked-tailed","giant","Andean hairy","nine-banded","pink fairy"], "aroma":{"pungent":"vinegar","putrid":"rotten eggs","floral":"roses"}}] +CSV from a two database dump: + + > rdb --command csv /var/redis/6379/dump.rdb + + database,type,key,size_in_bytes,encoding,num_elements,len_largest_element + 0,list,lizards,241,quicklist,5,19 + 0,list,user_list,190,quicklist,3,7 + 2,hash,baloon,138,ziplist,3,11 + 2,list,armadillo,231,quicklist,5,20 + 2,hash,aroma,129,ziplist,3,11 + + ## Filter parsed output Only process keys that match the regex, and only print key and values: diff --git a/rdbtools/__init__.py b/rdbtools/__init__.py index 6f6d9ee..bb4b0da 100644 --- a/rdbtools/__init__.py +++ b/rdbtools/__init__.py @@ -1,10 +1,10 @@ from rdbtools.parser import RdbCallback, RdbParser, DebugCallback -from rdbtools.callbacks import JSONCallback, DiffCallback, ProtocolCallback, KeyValsOnlyCallback, KeysOnlyCallback +from rdbtools.callbacks import JSONCallback, CSVCallback, DiffCallback, ProtocolCallback, KeyValsOnlyCallback, KeysOnlyCallback from rdbtools.memprofiler import MemoryCallback, PrintAllKeys, StatsAggregator, PrintJustKeys __version__ = '0.1.15' VERSION = tuple(map(int, __version__.split('.'))) __all__ = [ - 'RdbParser', 'RdbCallback', 'JSONCallback', 'DiffCallback', 'MemoryCallback', 'ProtocolCallback', 'KeyValsOnlyCallback', 'KeysOnlyCallback', 'PrintJustKeys'] + 'RdbParser', 'RdbCallback', 'JSONCallback', "CSVCallback", 'DiffCallback', 'MemoryCallback', 'ProtocolCallback', 'KeyValsOnlyCallback', 'KeysOnlyCallback', 'PrintJustKeys'] diff --git a/rdbtools/callbacks.py b/rdbtools/callbacks.py index 566217b..d4bf288 100644 --- a/rdbtools/callbacks.py +++ b/rdbtools/callbacks.py @@ -134,6 +134,124 @@ def end_module(self, key, buffer_size, buffer=None): self._end_key(key) self._out.write(b'}') +class CSVCallback(RdbCallback): + def __init__(self, out, string_escape=None): + if string_escape is None: + string_escape = encodehelpers.STRING_ESCAPE_UTF8 + super(CSVCallback, self).__init__(string_escape) + self._out = out + self._is_first_db = True + self._has_databases = False + self._is_first_key_in_db = True + self._elements_in_key = 0 + self._element_index = 0 + + def encode_key(self, key): + key = encodehelpers.bytes_to_unicode(key, self._escape, skip_printable=True) + return codecs.encode(key, 'utf-8') + + def encode_value(self, val): + return codecs.encode(codecs.encode(val, 'hex'), 'utf-8') + + def start_rdb(self): + pass + + def start_database(self, db_number): + self._is_first_db = False + self._has_databases = True + self._is_first_key_in_db = True + + def end_database(self, db_number): + pass + + def end_rdb(self): + pass + + def _start_key(self, key, length): + if not self._is_first_key_in_db: + self._out.write(b'\r\n') + self._is_first_key_in_db = False + self._elements_in_key = length + self._element_index = 0 + + def _end_key(self, key): + pass + + def _write_comma(self): + if self._element_index > 0 and self._element_index < self._elements_in_key: + self._out.write(b',') + self._element_index = self._element_index + 1 + + def set(self, key, value, expiry, info): + self._start_key(key, 0) + self._out.write(b'"' + self.encode_key(key) + b'","0x' + self.encode_value(value) + b'"') + self._end_key(key) + + def start_hash(self, key, length, expiry, info): + self._start_key(key, length) + self._out.write(self.encode_key(key) + b':{') + + def hset(self, key, field, value): + self._write_comma() + self._out.write(self.encode_key(field) + b':' + self.encode_value(value)) + + def end_hash(self, key): + self._end_key(key) + self._out.write(b'}') + + def start_set(self, key, cardinality, expiry, info): + self._start_key(key, cardinality) + self._out.write(self.encode_key(key) + b':[') + + def sadd(self, key, member): + self._write_comma() + self._out.write(self.encode_value(member)) + + def end_set(self, key): + self._end_key(key) + self._out.write(b']') + + def start_list(self, key, expiry, info): + self._start_key(key, 0) + self._out.write(self.encode_key(key) + b':[') + + def rpush(self, key, value): + self._elements_in_key += 1 + self._write_comma() + self._out.write(self.encode_value(value)) + + def end_list(self, key, info): + self._end_key(key) + self._out.write(b']') + + def start_sorted_set(self, key, length, expiry, info): + self._start_key(key, length) + self._out.write(self.encode_key(key) + b':{') + + def zadd(self, key, score, member): + self._write_comma() + self._out.write(self.encode_key(member) + b':' + self.encode_value(score)) + + def end_sorted_set(self, key): + self._end_key(key) + self._out.write(b'}') + + def start_stream(self, key, listpacks_count, expiry, info): + self._start_key(key, 0) + self._out.write(self.encode_key(key) + b':{') + + def end_stream(self, key, items, last_entry_id, cgroups): + self._end_key(key) + self._out.write(b'}') + + def start_module(self, key, module_name, expiry, info): + self._start_key(key, 0) + self._out.write(self.encode_key(key) + b':{') + return False + + def end_module(self, key, buffer_size, buffer=None): + self._end_key(key) + self._out.write(b'}') class KeysOnlyCallback(RdbCallback): def __init__(self, out, string_escape=None): diff --git a/rdbtools/cli/rdb.py b/rdbtools/cli/rdb.py index 1cbcf4b..3699e2c 100755 --- a/rdbtools/cli/rdb.py +++ b/rdbtools/cli/rdb.py @@ -3,7 +3,7 @@ import os import sys from argparse import ArgumentParser -from rdbtools import RdbParser, JSONCallback, DiffCallback, MemoryCallback, ProtocolCallback, PrintAllKeys, KeysOnlyCallback, KeyValsOnlyCallback +from rdbtools import RdbParser, JSONCallback, CSVCallback, DiffCallback, MemoryCallback, ProtocolCallback, PrintAllKeys, KeysOnlyCallback, KeyValsOnlyCallback from rdbtools.encodehelpers import ESCAPE_CHOICES from rdbtools.parser import HAS_PYTHON_LZF as PYTHON_LZF_INSTALLED @@ -82,6 +82,7 @@ def main(): callback = { 'diff': lambda f: DiffCallback(f, string_escape=options.escape), 'json': lambda f: JSONCallback(f, string_escape=options.escape), + 'csv': lambda f: CSVCallback(f, string_escape=options.escape), 'justkeys': lambda f: KeysOnlyCallback(f, string_escape=options.escape), 'justkeyvals': lambda f: KeyValsOnlyCallback(f, string_escape=options.escape), 'memory': lambda f: MemoryCallback(PrintAllKeys(f, options.bytes, options.largest),