Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Finish test-task-1 #32

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 30 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,36 @@
4. Найденный в соответствии с условием задачи месяц должен выводиться на английском языке в нижнем регистре. Если месяцев несколько, то на вывод они все подаются на английском языке в нижнем регистре в порядке их следования в течение года.

## Автор решения

Никифоров Савелий Денисович
## Описание реализации
Реализован базовый функционал прототипируемой системы.

Алгоритм работы завязан на импорте предоставленных данных в sqlite3 базе данных.
Впоследствии, средствами SQL происходит агрегация данных

Внутри проекта существует две основные сущности:

`JsonToSqliteLoader` - Импортирует объекты из Json в sqlite3 базу данных
`SqliteFindMaxSpendingMonthStrategy` - Стратегия поиска месяца в котором было потрачено наибольшая сумма посредством SQL-запросов.

Причина выбора агрегирования данных посредством SQL базы данных было выбрано для будущей масштабируемости (Если задача это подразумевала)

> Нейминг с суффиксом "стратегия" был взят из-за замысла дальнейшего масштабирования в прототипируемой системе, если будут альтернативные способы поиска необходимых данных.

Данная реализация не требует дополнительных библиотек. :)

Внимание! Тут не были схендлены ошибки. И пути их обхода.

## Инструкция по сборке и запуску решения

Python >= 3.11

```commandline
python3.11 main.py {{json-file}}
```

### Важно: У пользователя запускающего должны быть права rwxd (read-write-execute-delete). Так как в решении создаются временные папки.

Альтернативный путь использования:

- из модуля `main.py` вызвать функцию `find_max_spending_month` с аргументом, который указывает на путь к файлу.
49 changes: 49 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from src.loader import JsonToSqliteLoader
from src.utils import create_database, format_result
from src.strategies import SqliteFindMaxSpendingMonthStrategy
import sqlite3
from tempfile import TemporaryDirectory
from datetime import datetime
from os import PathLike
import argparse


def find_max_spending_month(file: PathLike):
# Sets fields as it was in example
datatypes_mapping = {
"user_id": str,
"ordered_at": datetime,
"status": str,
"total": float,
}
temp_db_name = "temp.db"
table_name = "sells"

# Create temporary directory for only importing file
with TemporaryDirectory(suffix="database", dir="./") as temp_dir:

# Create database in temp dir
connection = sqlite3.connect(f"{temp_dir}/{temp_db_name}")
create_database(
connection=connection, table_name=table_name, rows=datatypes_mapping
)
# Load data from json to sqlite3
loader = JsonToSqliteLoader(
file_path=file, table_name=table_name, connection=connection
)
loader.load()

# Finding and formatting result
strategy = SqliteFindMaxSpendingMonthStrategy(connection, table_name)
result = format_result(strategy.find())
print(result)


if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Finds max spending month in given json-file"
)
parser.add_argument("file", type=str, help="Data input on sells")
args = parser.parse_args()

find_max_spending_month(args.file)
Empty file added src/__init__.py
Empty file.
110 changes: 110 additions & 0 deletions src/loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
import json
from abc import ABC, abstractmethod
from os import PathLike
import sqlite3


class AbstractLoader(ABC):
"""
Abstract data loader
"""

def __init__(self, file_path: str | PathLike):
self._file_path = file_path

def load(self):
"""
Load data for given resource
"""


class JsonToSqliteLoader(AbstractLoader):
"""
This loader creates sqlite3 database for aggregating large number of sells
You can easily override `load` method to import data into database.
"""

def __init__(
self,
file_path: str | PathLike,
table_name: str,
connection: sqlite3.Connection,
datatype_mapping=None,
mapping: dict = None,
):
"""

:param file_path: Path to Json file
:param table_name: Table to insert with
:param connection: Sqlite3 database connection
:param datatype_mapping: Mapping for wrapping str items into brackets
:param mapping: object and database mapping. For ex: {"json_field": "database_field"}
"""
super().__init__(file_path)
self._connection = connection
self._mapping = mapping
self._table_name = table_name

def load(self):
"""
Load file to Sqlite database
"""
cursor = self._connection.cursor()
for obj in self.get_data():
self.insert_row(cursor=cursor, obj=obj)

def get_data(self) -> list[dict]:
"""
Just open json file with built-in json module
It can be generator if Json file is huge.
:return: Return list of dicts due to given example
"""
with open(self._file_path) as file:
return json.load(file)

def insert_row(self, cursor, obj):
"""
Insert row to database;
It should be overrided to work with chunks
:return:
"""
data = self.map_to_columns(obj)

# We can make like this because python saves
# in which order elements was inserted \ created
columns = ", ".join(data.keys())
values = ", ".join(data.values())

cursor.execute(
f"INSERT INTO {self._table_name} ({columns})" f"VALUES ({values});"
)

self._connection.commit()

def map_to_columns(self, obj: dict):
"""
Returns mapped fields data from obj to database
:param obj: importing obj
:return: dictitonary with {field_database: obj_value}
"""
if not self._mapping:
return self.wrap_datatypes(obj)

mapped_obj = {
field_database: obj.get(field_obj)
for field_obj, field_database in self._mapping.values()
}

return self.wrap_datatypes(mapped_obj)

def wrap_datatypes(self, obj: dict):
"""
Wraps str datatypes into brackets
Now it's workaround, wraps str datatypes into bracket
:param obj: obj
:return:
"""
for key, value in obj.items():
if isinstance(value, str):
obj[key] = f"'{value}'"
return obj
57 changes: 57 additions & 0 deletions src/strategies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import sqlite3
from abc import ABC, abstractmethod


class AbstractStrategy(ABC):

@abstractmethod
def find(self):
pass


class SqliteFindMaxSpendingMonthStrategy(AbstractStrategy):
"""Strategy to find max spending month with given sqlite3 database connection"""

_find_max_total_query = """
SELECT SUM(total) as total_sum
FROM {table_name}
WHERE status = 'COMPLETED'
GROUP BY strftime('%m',date(ordered_at))
ORDER BY -total_sum
LIMIT -1
"""

_find_month_query = """
SELECT strftime('%m',date(ordered_at)) as month
FROM {table_name}
WHERE status = 'COMPLETED'
GROUP BY month
HAVING SUM(total) = {max_sum}
ORDER BY month
"""

def __init__(self, connection: sqlite3.Connection, table_name: str):
self._connection = connection
self._table_name = table_name

def find(self) -> list[str]:
cursor = self._connection.cursor()
max_sum = self._get_max_sum(cursor)
months = self._get_months(
cursor,
max_sum,
)
return [month[0] for month in months]

def _get_max_sum(self, cursor) -> float:
"""Execute query for finding max spending summ"""
# Just unpack fetched
return cursor.execute(
self._find_max_total_query.format(table_name=self._table_name)
).fetchone()[0]

def _get_months(self, cursor, max_sum) -> list[tuple[str]]:
"""Execute query for finding months number by given summ"""
return cursor.execute(
self._find_month_query.format(table_name=self._table_name, max_sum=max_sum)
).fetchall()
59 changes: 59 additions & 0 deletions src/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import json
import sqlite3
from datetime import datetime
import calendar


datatype_mapping = {int: "INTEGER", str: "TEXT", datetime: "TEXT", float: "NUMERIC"}

CREATE_QUERY_TEMPLATE = """
CREATE TABLE {table_name} ( {fields} );
"""


def create_database(connection: sqlite3.Connection, table_name: str, rows: dict):
"""
Help-function for generating table in database
:param connection: connection to sqlite3 database
:param rows: Fields of database, mapping like {"field_name": datatype}
:param table_name: table name for creating
:return: None :)
"""

fields = ", ".join(
f"{field} {datatype_mapping.get(datatype)}" for field, datatype in rows.items()
)

cursor = connection.cursor()
create_query = CREATE_QUERY_TEMPLATE.format(table_name=table_name, fields=fields)

cursor.execute(create_query)
connection.commit()


def format_months(months: list[str]) -> list[str]:
"""
Format month numbers into labels
:param months:
:return:
"""
formatted = []
for month in months:
formatted.append(
# lower due to given example in README.MD
# This is inefficient but, imho more clear
calendar.month_name[int(month)].lower()
)

return formatted


def format_result(months: list[str]) -> str:
"""
Format result with given example in README.MD
:param months:
:return: json-like string
"""
formatted_months = format_months(months)
result_obj = {"months": formatted_months}
return json.dumps(result_obj)