Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add CSV serialization #11

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions atlas_core/data_ingestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,12 +119,13 @@ def process_dataset(dataset):
df = merge_ids_from_codes(
df, field_name, classification_table, field_name + "_id"
)

df[field_name + "_id"] = df[field_name + "_id"].astype(
"category", categories=classification_table.index.values
CategoricalDtype(categories=classification_table.index.values)
)

if "year" in df.columns:
df["year"] = df["year"].astype("category", categories=df.year.unique())
df["year"] = df["year"].astype(CategoricalDtype(categories=df.year.unique()))

# Gather each facet dataset (e.g. DY, PY, DPY variables from DPY dataset)
facet_outputs = {}
Expand Down
47 changes: 46 additions & 1 deletion atlas_core/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ class MsgpackSerializer(ISerializerStrategy):

def __init__(self):
import msgpack

self.msgpack = msgpack

def serialize(self, *args, **kwargs):
Expand All @@ -92,9 +93,9 @@ def serialize(self, *args, **kwargs):


class UjsonSerializer(ISerializerStrategy):

def __init__(self):
import ujson

self.ujson = ujson

def serialize(self, *args, **kwargs):
Expand All @@ -114,3 +115,47 @@ def serialize(self, *args, **kwargs):
+ "\n",
mimetype="application/json",
)


class CsvSerializer(ISerializerStrategy):
def __init__(self):
import csv
import pandas as pd
from io import StringIO

self.stringio = StringIO
self.pd = pd
self.csv = csv

def serialize(self, *args, **kwargs):
if args and kwargs:
raise TypeError("behavior undefined when passed both args and kwargs")
elif len(args) == 1: # single args are passed directly to dumps()
data = args[0]
elif "data" in kwargs.keys(): # data passed through kwarg passed to dumps()
data = kwargs["data"]
else:
data = args or kwargs

return current_app.response_class(
self.create_file_object(data).getvalue(), mimetype="text/csv"
)

def create_file_object(self, data):
"""
Writes pandas dataframe to an in-memory StringIO file object. Adapted from
https://gist.github.com/mangecoeur/1fbd63d4758c2ba0c470#gistcomment-2086007

Parameters
----------
data: serializable data

Returns
-------
file_object: StringIO
"""
df = self.pd.DataFrame(data)
file_object = self.stringio()
df.to_csv(file_object, index=False)
file_object.seek(0)
return file_object