Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support to convert RDB to CSV file #172

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ To install from source :
# Command line usage examples

Every run of RDB Tool requires to specify a command to indicate what should be done with the parsed RDB data.
Valid commands are: json, diff, justkeys, justkeyvals and protocol.
Valid commands are: json, csv, diff, justkeys, justkeyvals and protocol.

JSON from a two database dump:

Expand All @@ -48,6 +48,18 @@ JSON from a two database dump:
"armadillo":["chacoan naked-tailed","giant","Andean hairy","nine-banded","pink fairy"],
"aroma":{"pungent":"vinegar","putrid":"rotten eggs","floral":"roses"}}]

CSV from a two database dump:

> rdb --command csv /var/redis/6379/dump.rdb

database,type,key,size_in_bytes,encoding,num_elements,len_largest_element
0,list,lizards,241,quicklist,5,19
0,list,user_list,190,quicklist,3,7
2,hash,baloon,138,ziplist,3,11
2,list,armadillo,231,quicklist,5,20
2,hash,aroma,129,ziplist,3,11


## Filter parsed output

Only process keys that match the regex, and only print key and values:
Expand Down
4 changes: 2 additions & 2 deletions rdbtools/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from rdbtools.parser import RdbCallback, RdbParser, DebugCallback
from rdbtools.callbacks import JSONCallback, DiffCallback, ProtocolCallback, KeyValsOnlyCallback, KeysOnlyCallback
from rdbtools.callbacks import JSONCallback, CSVCallback, DiffCallback, ProtocolCallback, KeyValsOnlyCallback, KeysOnlyCallback
from rdbtools.memprofiler import MemoryCallback, PrintAllKeys, StatsAggregator, PrintJustKeys

__version__ = '0.1.15'
VERSION = tuple(map(int, __version__.split('.')))

__all__ = [
'RdbParser', 'RdbCallback', 'JSONCallback', 'DiffCallback', 'MemoryCallback', 'ProtocolCallback', 'KeyValsOnlyCallback', 'KeysOnlyCallback', 'PrintJustKeys']
'RdbParser', 'RdbCallback', 'JSONCallback', "CSVCallback", 'DiffCallback', 'MemoryCallback', 'ProtocolCallback', 'KeyValsOnlyCallback', 'KeysOnlyCallback', 'PrintJustKeys']

118 changes: 118 additions & 0 deletions rdbtools/callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,124 @@ def end_module(self, key, buffer_size, buffer=None):
self._end_key(key)
self._out.write(b'}')

class CSVCallback(RdbCallback):
def __init__(self, out, string_escape=None):
if string_escape is None:
string_escape = encodehelpers.STRING_ESCAPE_UTF8
super(CSVCallback, self).__init__(string_escape)
self._out = out
self._is_first_db = True
self._has_databases = False
self._is_first_key_in_db = True
self._elements_in_key = 0
self._element_index = 0

def encode_key(self, key):
key = encodehelpers.bytes_to_unicode(key, self._escape, skip_printable=True)
return codecs.encode(key, 'utf-8')

def encode_value(self, val):
return codecs.encode(codecs.encode(val, 'hex'), 'utf-8')

def start_rdb(self):
pass

def start_database(self, db_number):
self._is_first_db = False
self._has_databases = True
self._is_first_key_in_db = True

def end_database(self, db_number):
pass

def end_rdb(self):
pass

def _start_key(self, key, length):
if not self._is_first_key_in_db:
self._out.write(b'\r\n')
self._is_first_key_in_db = False
self._elements_in_key = length
self._element_index = 0

def _end_key(self, key):
pass

def _write_comma(self):
if self._element_index > 0 and self._element_index < self._elements_in_key:
self._out.write(b',')
self._element_index = self._element_index + 1

def set(self, key, value, expiry, info):
self._start_key(key, 0)
self._out.write(b'"' + self.encode_key(key) + b'","0x' + self.encode_value(value) + b'"')
self._end_key(key)

def start_hash(self, key, length, expiry, info):
self._start_key(key, length)
self._out.write(self.encode_key(key) + b':{')

def hset(self, key, field, value):
self._write_comma()
self._out.write(self.encode_key(field) + b':' + self.encode_value(value))

def end_hash(self, key):
self._end_key(key)
self._out.write(b'}')

def start_set(self, key, cardinality, expiry, info):
self._start_key(key, cardinality)
self._out.write(self.encode_key(key) + b':[')

def sadd(self, key, member):
self._write_comma()
self._out.write(self.encode_value(member))

def end_set(self, key):
self._end_key(key)
self._out.write(b']')

def start_list(self, key, expiry, info):
self._start_key(key, 0)
self._out.write(self.encode_key(key) + b':[')

def rpush(self, key, value):
self._elements_in_key += 1
self._write_comma()
self._out.write(self.encode_value(value))

def end_list(self, key, info):
self._end_key(key)
self._out.write(b']')

def start_sorted_set(self, key, length, expiry, info):
self._start_key(key, length)
self._out.write(self.encode_key(key) + b':{')

def zadd(self, key, score, member):
self._write_comma()
self._out.write(self.encode_key(member) + b':' + self.encode_value(score))

def end_sorted_set(self, key):
self._end_key(key)
self._out.write(b'}')

def start_stream(self, key, listpacks_count, expiry, info):
self._start_key(key, 0)
self._out.write(self.encode_key(key) + b':{')

def end_stream(self, key, items, last_entry_id, cgroups):
self._end_key(key)
self._out.write(b'}')

def start_module(self, key, module_name, expiry, info):
self._start_key(key, 0)
self._out.write(self.encode_key(key) + b':{')
return False

def end_module(self, key, buffer_size, buffer=None):
self._end_key(key)
self._out.write(b'}')

class KeysOnlyCallback(RdbCallback):
def __init__(self, out, string_escape=None):
Expand Down
3 changes: 2 additions & 1 deletion rdbtools/cli/rdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import os
import sys
from argparse import ArgumentParser
from rdbtools import RdbParser, JSONCallback, DiffCallback, MemoryCallback, ProtocolCallback, PrintAllKeys, KeysOnlyCallback, KeyValsOnlyCallback
from rdbtools import RdbParser, JSONCallback, CSVCallback, DiffCallback, MemoryCallback, ProtocolCallback, PrintAllKeys, KeysOnlyCallback, KeyValsOnlyCallback
from rdbtools.encodehelpers import ESCAPE_CHOICES
from rdbtools.parser import HAS_PYTHON_LZF as PYTHON_LZF_INSTALLED

Expand Down Expand Up @@ -82,6 +82,7 @@ def main():
callback = {
'diff': lambda f: DiffCallback(f, string_escape=options.escape),
'json': lambda f: JSONCallback(f, string_escape=options.escape),
'csv': lambda f: CSVCallback(f, string_escape=options.escape),
'justkeys': lambda f: KeysOnlyCallback(f, string_escape=options.escape),
'justkeyvals': lambda f: KeyValsOnlyCallback(f, string_escape=options.escape),
'memory': lambda f: MemoryCallback(PrintAllKeys(f, options.bytes, options.largest),
Expand Down