Skip to content

Commit

Permalink
Merge pull request #25 from CAIDA/py3
Browse files Browse the repository at this point in the history
Update python3 version
  • Loading branch information
digizeph authored May 6, 2021
2 parents 1442d40 + 78c3d8a commit cd253fc
Show file tree
Hide file tree
Showing 10 changed files with 113 additions and 24 deletions.
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@

setuptools.setup(
name='pywandio',
version='0.2.1',
version='0.3.0',
description='High-level file IO library',
url='https://github.com/CAIDA/pywandio',
author='Alistair King, Chiara Orsini, Mingwei Zhang',
author_email='software@caida.org',
packages=setuptools.find_packages(),
install_requires=[
'python-dotenv',
'python-keystoneclient',
'python-swiftclient',
],
Expand Down
19 changes: 19 additions & 0 deletions tests/test-local-gzip-read.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import wandio

if __name__ == '__main__':

with wandio.open('test.txt.gz') as fh:
line_count = 0
word_count = 0
for line in fh:
word_count += len(line.rstrip().split())
line_count +=1
print(line_count, word_count)

with wandio.open('test.txt.gz', "rb") as fh:
line_count = 0
word_count = 0
for line in fh:
word_count += len(line.rstrip().split())
line_count +=1
print(line_count, word_count)
13 changes: 13 additions & 0 deletions tests/test-local-gzip-write.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import wandio

if __name__ == '__main__':

with wandio.open('http://data.caida.org/datasets/as-relationships/README.txt') as fh:
with wandio.open('test.txt.gz', mode='w') as ofh:
line_count = 0
word_count = 0
for line in fh:
word_count += len(line.rstrip().split())
line_count +=1
ofh.write(line)
print(line_count, word_count)
40 changes: 40 additions & 0 deletions tests/test-swift-read.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import wandio
from dotenv import load_dotenv, find_dotenv
import os

if __name__ == '__main__':

files = [
# 'swift://datasets-external-netacq-codes/country_codes.csv',
# 'https://www.caida.org/~mingwei/',
# 'http://data.caida.org/datasets/as-relationships/README.txt',
# 'http://loki.caida.org:2243/data/external/as-rank-ribs/19980101/19980101.as-rel.txt.bz2',
# '/home/mingwei/moas.1601006100.events.gz',
'swift://bgp-hijacks-edges/year=2020/month=09/day=30/hour=11/edges.1601466300.events.gz',
]

load_dotenv(find_dotenv(".limbo-cred"), override=True)
options = {
"auth_version": '3',
"os_username": os.environ.get('OS_USERNAME', None),
"os_password": os.environ.get('OS_PASSWORD', None),
"os_project_name": os.environ.get('OS_PROJECT_NAME', None),
"os_auth_url": os.environ.get('OS_AUTH_URL', None),
}

for filename in files:
# the with statement automatically closes the file at the end
# of the block
try:
with wandio.open(filename,options=options) as fh:
line_count = 0
word_count = 0
for line in fh:
word_count += len(line.rstrip().split())
line_count +=1
# print the number of lines and words in file
print(filename)
print(line_count, word_count)
except IOError as err:
print(filename)
raise err
Binary file added tests/test.txt.gz
Binary file not shown.
2 changes: 2 additions & 0 deletions wandio/compressed.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ def flush(self):
self.fh.flush()

def write(self, data):
if isinstance(data, str):
data = data.encode()
cd = self.compressor.compress(data)
# cd is partial compressed data
self.fh.write(cd)
Expand Down
10 changes: 6 additions & 4 deletions wandio/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,13 @@ def __init__(self):

class SimpleReader(GenericReader):

def __init__(self, filename):
super(SimpleReader, self).__init__(open(filename, "r"))
def __init__(self, filename, mode="r"):
assert mode in ["r", "rb"]
super(SimpleReader, self).__init__(open(filename, mode))


class SimpleWriter(GenericWriter):

def __init__(self, filename):
super(SimpleWriter, self).__init__(open(filename, "w"))
def __init__(self, filename, mode="w"):
assert mode in ["w", "wb"]
super(SimpleWriter, self).__init__(open(filename, mode))
41 changes: 25 additions & 16 deletions wandio/opener.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@

import wandio.compressed
import wandio.file
import wandio.http
import wandio.wand_http
import wandio.swift


class Reader(wandio.file.GenericReader):

def __init__(self, filename, options=None):
def __init__(self, filename, mode="r", options=None):
self.filename = filename

# check for the transport types first (HTTP, Swift, Simple)
Expand All @@ -31,15 +31,18 @@ def __init__(self, filename, options=None):

# is this simple HTTP ?
elif urlparse(self.filename).netloc:
fh = wandio.http.HttpReader(self.filename)
fh = wandio.wand_http.HttpReader(self.filename)

# stdin?
elif filename == "-":
fh = wandio.file.StdinReader()

# then it must be a simple local file
else:
fh = wandio.file.SimpleReader(self.filename)
# default reading as binary mode for .gz and .bz2 files
if filename.endswith(".gz") or filename.endswith(".bz2"):
mode = "rb"
fh = wandio.file.SimpleReader(self.filename, mode=mode)

assert fh

Expand All @@ -63,22 +66,25 @@ def __init__(self, filename, options=None):
# TODO: refactor Reader and Writer
class Writer(wandio.file.GenericWriter):

def __init__(self, filename, options=None):
def __init__(self, filename, mode="w", options=None):
self.filename = filename

# check for the transport types first (HTTP, Swift, Simple)
is_binary_file = True if filename.endswith(".gz") or filename.endswith(".bz2") else False

# check for the transport types first (HTTP, Swift, Simple)
# is this Swift
if filename.startswith("swift://"):
fh = wandio.swift.SwiftWriter(self.filename, options=options)
fh = wandio.swift.SwiftWriter(self.filename, options=options, use_bytes_io=is_binary_file)

# is this simple HTTP ?
elif urlparse(self.filename).netloc:
raise NotImplementedError("Writing to HTTP is not supported")

# then it must be a simple local file
else:
fh = wandio.file.SimpleWriter(self.filename)
if is_binary_file:
mode = "wb"
fh = wandio.file.SimpleWriter(self.filename, mode=mode)

assert fh

Expand All @@ -99,13 +105,13 @@ def __init__(self, filename, options=None):
super(Writer, self).__init__(fh)


def wandio_open(filename, mode="r"):
if mode == "r":
return Reader(filename)
elif mode == "w":
return Writer(filename)
def wandio_open(filename, mode="r", options=None):
if mode in ["r", "rb"]:
return Reader(filename, mode, options)
elif mode in ["w", "wb"]:
return Writer(filename, mode, options)
else:
raise ValueError("Invalid mode. Mode must be either 'r' or 'w'")
raise ValueError("Invalid mode. Mode must be either 'r'/'rb' or 'w'/'wb'")


def wandio_stat(filename):
Expand All @@ -119,7 +125,7 @@ def wandio_stat(filename):

# is this simple HTTP ?
elif urlparse(filename).netloc:
statfunc = wandio.http.http_stat
statfunc = wandio.wand_http.http_stat

# stdin?
elif filename == "-":
Expand Down Expand Up @@ -147,11 +153,14 @@ def read_main():
help="Force use of next (for testing)")

parser.add_argument('files', nargs='+', help='Files to read from')
parser.add_argument('-m', '--file-mode', required=False,
type=str, default='r',
help="Open files using this file mode")

opts = vars(parser.parse_args())

for filename in opts['files']:
with Reader(filename) as fh:
with Reader(filename, mode=opts['file_mode']) as fh:
if opts['use_next']:
# sys.stderr.write("Reading using 'next'\n")
for line in fh:
Expand Down
9 changes: 6 additions & 3 deletions wandio/swift.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,18 +214,21 @@ def close(self):
# TODO: figure out how to stream to swift rather than buffer all in memory
class SwiftWriter(wandio.file.GenericWriter):

def __init__(self, url, options=None):
def __init__(self, url, options=None, use_bytes_io=False):
parsed_url = parse_url(url)
self.container = parsed_url["container"]
self.object = parsed_url["obj"]
self.options = options
self.buffer = io.StringIO()
if use_bytes_io:
self.buffer = io.BytesIO()
else:
self.buffer = io.StringIO()
super(SwiftWriter, self).__init__(self.buffer)

def flush(self):
pass

def close(self):
self.buffer.reset()
self.buffer.seek(0)
upload(self.buffer, container=self.container,
obj=self.object, options=self.options)
File renamed without changes.

0 comments on commit cd253fc

Please sign in to comment.