Skip to content

Commit

Permalink
Add new output format: csv with cluster info (#18)
Browse files Browse the repository at this point in the history
Co-authored-by: msramalho <19508417+msramalho@users.noreply.github.com>
  • Loading branch information
kashyapm94 and msramalho authored Sep 27, 2022
1 parent dc7e126 commit e9a7519
Show file tree
Hide file tree
Showing 6 changed files with 745 additions and 471 deletions.
1 change: 1 addition & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ black = "*"
pre-commit = "*"
pytest = "*"
wheel = "*"
geoclustering = {editable = true, path = "."}

[requires]
python_version = "3.9"
1,169 changes: 703 additions & 466 deletions Pipfile.lock

Large diffs are not rendered by default.

12 changes: 11 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ id,name,lat,lon

## Output

If at least one cluster was found, the tool outputs a folder with output as `json`, `geojson`, `txt` files. A kepler.gl `html` file is generated as well.
If at least one cluster was found, the tool outputs a folder with output as `json`, `geojson`, `txt`, `csv` files. A kepler.gl `html` file is generated as well.

### JSON

Expand Down Expand Up @@ -132,6 +132,16 @@ id 9, name Rosanna Foggo, lat -6.2074293, lon 106.8915948
// ...
```

### CSV

Encodes each event in one line with `cluster_id` information associated.

```csv
cluster_id,name,lat,lon
9,Rosanna Foggo,-6.2074293,106.8915948
...
```

### kepler.gl

![kepler.gl instance](https://user-images.githubusercontent.com/1682504/176478177-c0446b51-4060-495c-803d-79e2bbd3e966.png)
Expand Down
2 changes: 1 addition & 1 deletion geoclustering/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,10 @@ def print_debug(s):
print_debug(f"Found {len(clusters)} valid clusters using {algorithm}")

encoded = encoding.encode_clusters(clusters)

io.write_output_file(output, "result.txt", encoded["string"])
io.write_output_file(output, "result.json", encoded["json"])
io.write_output_file(output, "result.geojson", encoded["geojson"])
io.write_output_file(output, "result.csv", encoded["csv"])

vis = io.write_visualization(output, "result.html", encoded["geojson"])
if vis is None:
Expand Down
30 changes: 28 additions & 2 deletions geoclustering/encoding.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import json
import numpy as np
import geojson
import csv
import io # not io.py


class NpEncoder(json.JSONEncoder):
Expand Down Expand Up @@ -74,13 +76,36 @@ def get(self):
return json.dumps(geojson.FeatureCollection(self.state), cls=NpEncoder)


class CSVEncoder:
"""Encodes clustering result as a CSV"""

def __init__(self):
self.state = io.StringIO()
self.writer = False

def visitor(self, cluster_id, cluster):
if not self.writer:
self.writer = csv.DictWriter(
self.state,
fieldnames=["cluster_id"] + list(cluster[0].keys()),
quoting=csv.QUOTE_NONNUMERIC,
)
self.writer.writeheader()

for record in cluster:
self.writer.writerow({**record, "cluster_id": cluster_id})

def get(self):
return self.state.getvalue()


def encode_clusters(clusters):
json_encoder = JSONEncoder()
geojson_encoder = GeoJSONEncoder()
string_encoder = StringEncoder()
csv_encoder = CSVEncoder()

encoders = [json_encoder, geojson_encoder, string_encoder]

encoders = [json_encoder, geojson_encoder, string_encoder, csv_encoder]
for cluster_id, cluster in clusters.items():
for encoder in encoders:
encoder.visitor(cluster_id, cluster)
Expand All @@ -89,4 +114,5 @@ def encode_clusters(clusters):
"json": json_encoder.get(),
"geojson": geojson_encoder.get(),
"string": string_encoder.get(),
"csv": csv_encoder.get(),
}
2 changes: 1 addition & 1 deletion tests/fixtures/snapshots/result.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
[{"cluster_id": 0, "points": [{"id": 1, "name": "Alice", "lat": 52.523955, "lon": 13.442362}, {"id": 2, "name": "Bob", "lat": 52.526659, "lon": 13.448097}]}, {"cluster_id": 0, "points": [{"id": 1, "name": "Alice", "lat": 52.523955, "lon": 13.442362}, {"id": 2, "name": "Bob", "lat": 52.526659, "lon": 13.448097}]}, {"cluster_id": 1, "points": [{"id": 3, "name": "Carol", "lat": 52.525626, "lon": 13.419246}, {"id": 4, "name": "Dan", "lat": 52.52443559865125, "lon": 13.41261723049818}]}, {"cluster_id": 1, "points": [{"id": 3, "name": "Carol", "lat": 52.525626, "lon": 13.419246}, {"id": 4, "name": "Dan", "lat": 52.52443559865125, "lon": 13.41261723049818}]}]
[{"cluster_id": 0, "points": [{"id": 1, "name": "Alice", "lat": 52.523955, "lon": 13.442362}, {"id": 2, "name": "Bob", "lat": 52.526659, "lon": 13.448097}]}, {"cluster_id": 1, "points": [{"id": 3, "name": "Carol", "lat": 52.525626, "lon": 13.419246}, {"id": 4, "name": "Dan", "lat": 52.52443559865125, "lon": 13.41261723049818}]}]

0 comments on commit e9a7519

Please sign in to comment.