diff --git a/.gitignore b/.gitignore index 518ccac..e062a14 100644 --- a/.gitignore +++ b/.gitignore @@ -226,5 +226,5 @@ backup/ mysql/backup.sql mysql/test.sql - +.venv/ __pycache__/ diff --git a/docker-compose.yml b/docker-compose.yml index 9e4b945..a3ea899 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -19,6 +19,7 @@ services: depends_on: vol_mysql: condition: service_healthy + vol_mysql: container_name: ${MYSQL_CONTAINER_NAME} build: ./mysql @@ -38,6 +39,7 @@ services: interval: 10s timeout: 20s retries: 20 + vol_python: container_name: vol_python build: ./python @@ -45,12 +47,38 @@ services: - ./python/app:/app env_file: - .env - command: + command: - "python3" - "main.py" depends_on: vol_mysql: - condition: service_healthy + condition: service_healthy + + vol_probability_api: + container_name: vol_probability_api + build: + context: . + dockerfile: ./probability/api/dockerfile + env_file: + - ./probability/.env + ports: + - "${PROBABILITY_PORT}:8090" + expose: + - 8090 + depends_on: + - vol_mysql + + vol_probability_clustering: + container_name: vol_probability_clustering + tty: true + build: + context: . + dockerfile: ./probability/clustering/dockerfile + env_file: + - ./probability/.env + depends_on: + - vol_mysql + networks: default: external: diff --git a/probability/api/__init__.py b/probability/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/probability/api/controller/__init__.py b/probability/api/controller/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/probability/api/controller/probability.py b/probability/api/controller/probability.py new file mode 100644 index 0000000..b9582cf --- /dev/null +++ b/probability/api/controller/probability.py @@ -0,0 +1,60 @@ +from __future__ import annotations +import math +from typing import List +from pydantic import BaseModel +from fastapi import APIRouter, Depends +from fastapi.encoders import jsonable_encoder +from fastapi.responses import ORJSONResponse +from sqlalchemy.orm import Session +from datetime import datetime, timedelta +from models import cluster as cl, user as us, logs as lg +from service import normal_distribution as nd +from lib.mysql import get_db + +# レスポンス用のクラス +class ProbabilityResponse(BaseModel): + userId: int + userName: str + probability: float + +router = APIRouter() + +# フロント側から現在の時刻を受け取ることとする(後で要変更) +# 今日のある時間までに特定のユーザーが入退室する確率、もしくはある時間以降に入退室する確率を返す +# 変数:user_id, true or false +@router.get("/app/probability/{reporting}/{before}" , response_class=ORJSONResponse, response_model=ProbabilityResponse) +async def get_probability_reporting_before(reporting:str, before:str, user_id:int = 0, date:str = '2024-1-1', time:str = "24:00:00", db: Session = Depends(get_db)): + r = True if reporting == "reporting" else False + b = True if before == "before" else False + date_object= datetime.strptime(date, '%Y-%m-%d') #今日の日付 + seven_days_ago= date_object - timedelta(days=7) + clusters = cl.get_all_cluster_by_userId_and_date(db, user_id, seven_days_ago, r) + delta = abs(clusters[0].date - lg.get_oldest_log_by_userId(user_id).date + timedelta(days=1)) + # 差分を週単位に変換 + days_difference = math.floor(delta.days/7) + # ここでクラスタリングの結果を元に確率を計算する(bがTrueなら以前, Falseなら以降) + pr = nd.probability_from_normal_distribution(clusters, time, days_difference, b) + result = ProbabilityResponse(userId=user_id, userName=us.get_user_by_id(db, user_id).name, probability=pr) + result_json = jsonable_encoder(result) + return ORJSONResponse(result_json) + +# 全てのユーザがその日に入室する確率を返す +@router.get("/app/probability/{community}/all", response_class=ORJSONResponse, response_model=List[ProbabilityResponse]) +async def get_probability_all(community:int, date:str = "2024-1-1", db: Session = Depends(get_db)): + date_object= datetime.strptime(date, '%Y-%m-%d') + seven_days_ago= date_object - timedelta(days=7) + users = us.get_all_users_by_community(community,db) + # 結果格納用のリスト + result: list[ProbabilityResponse] = [] + # ユーザーごとに繰り返す + for user in users: + clusters = cl.get_all_cluster_by_userId_and_date(db, user.id, seven_days_ago, True) + delta = abs(clusters[0].date - cl.get_oldest_cluster_by_userId(db, user.id, True).date + timedelta(days=1)) + # 差分を日単位に変換 + days_difference = math.floor(delta.days/7) + # ここでクラスタリングの結果を元に確率を計算する(bがTrueなら以前, Falseなら以降) + pr = nd.probability_from_normal_distribution(clusters, "24:00:00", days_difference, True) + result.append(ProbabilityResponse(userId=user.id, userName=user.name, probability=pr)) + # resultをjsonに変換 + result_json = jsonable_encoder(result) + return ORJSONResponse(result_json) \ No newline at end of file diff --git a/probability/api/controller/root.py b/probability/api/controller/root.py new file mode 100644 index 0000000..6b7468c --- /dev/null +++ b/probability/api/controller/root.py @@ -0,0 +1,7 @@ +from fastapi import APIRouter + +router = APIRouter() + +@router.get("/") +def Hello(): + return {"Hello": "World"} \ No newline at end of file diff --git a/probability/api/dockerfile b/probability/api/dockerfile new file mode 100644 index 0000000..f47cd6b --- /dev/null +++ b/probability/api/dockerfile @@ -0,0 +1,18 @@ +FROM python:3.8.18 + +WORKDIR /usr/src/probability + +RUN apt update && apt install -y \ + libpq-dev \ + gcc \ + curl \ + git + +COPY ./probability/api/ ./ + +RUN pip install --upgrade pip && \ + pip install -r requirements.txt + +EXPOSE 8090 + +CMD ["python", "main.py"] \ No newline at end of file diff --git a/probability/api/lib/mysql.py b/probability/api/lib/mysql.py new file mode 100644 index 0000000..173efc6 --- /dev/null +++ b/probability/api/lib/mysql.py @@ -0,0 +1,22 @@ +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker +import os + +# 接続したいDBへの接続情報 +user_name = os.environ['USER_NAME'] +password = os.environ['PASSWORD'] +host = os.environ['HOST'] +port = os.environ['PORT'] +database = os.environ['DATABASE'] + +SQLALCHEMY_DATABASE_URL = "mysql://" + user_name + ":" + password + "@" + host + ":" +port + "/" + database + "?charset=utf8&unix_socket=/var/run/mysqld/mysqld.sock" + +engine = create_engine(SQLALCHEMY_DATABASE_URL, echo=True) +SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) + +def get_db() : + db = SessionLocal() + try: + yield db + finally: + db.close() \ No newline at end of file diff --git a/probability/api/main.py b/probability/api/main.py new file mode 100644 index 0000000..aa05975 --- /dev/null +++ b/probability/api/main.py @@ -0,0 +1,11 @@ +from fastapi import FastAPI +from controller.root import router as root_router +from controller.probability import router as probability_router + +app = FastAPI() +app.include_router(root_router) +app.include_router(probability_router) + +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=8090) \ No newline at end of file diff --git a/probability/api/models/cluster.py b/probability/api/models/cluster.py new file mode 100644 index 0000000..7c61c01 --- /dev/null +++ b/probability/api/models/cluster.py @@ -0,0 +1,22 @@ +from __future__ import annotations +from sqlalchemy import select +from sqlalchemy.orm import Session +from . import struct as st + +# userIdから最新のクラスタリングを取得する +def get_latest_cluster_by_userId(db: Session, userId, reporting:bool) -> st.Cluster | None: + # clustersを取得 + cluster: st.Cluster | None = db.scalar(select(st.Cluster).where(st.Cluster.user_id == userId, st.Cluster.reporting == reporting).order_by(st.Cluster.date.desc())) + return cluster + +# userIdから最古のクラスタリングを取得する +def get_oldest_cluster_by_userId(db: Session, userId, reporting:bool) -> st.Cluster | None: + # clustersを取得 + cluster: st.Cluster | None = db.scalar(select(st.Cluster).where(st.Cluster.user_id == userId, st.Cluster.reporting == reporting).order_by(st.Cluster.date)) + return cluster + +# 取得したclusterと同じuser_id、dateのclusterを全て取得する +def get_all_cluster_by_userId_and_date(db: Session, userId, date, reporting:bool) -> list[st.Cluster]: + # clustersを取得 + clusters: list[st.Cluster] = db.query(st.Cluster).where(st.Cluster.user_id == userId, st.Cluster.reporting == reporting, st.Cluster.date == date).all() + return clusters \ No newline at end of file diff --git a/probability/api/models/logs.py b/probability/api/models/logs.py new file mode 100644 index 0000000..1d13cd4 --- /dev/null +++ b/probability/api/models/logs.py @@ -0,0 +1,9 @@ +from __future__ import annotations +from sqlalchemy import select +from sqlalchemy.orm import Session +from . import struct as st + +def get_oldest_log_by_userId(db: Session, userId) -> st.Logs: + # logsを取得 + log: st.Logs = db.scalar(select(st.Logs).where(st.Logs.user_id == userId).order_by(st.Logs.time)) + return log \ No newline at end of file diff --git a/probability/api/models/struct.py b/probability/api/models/struct.py new file mode 100644 index 0000000..c4e40e9 --- /dev/null +++ b/probability/api/models/struct.py @@ -0,0 +1,53 @@ +from sqlalchemy.orm import ( + DeclarativeBase, + Mapped, + mapped_column +) +# datetime型をインポート +import datetime + +# sqlalchemyのモデルを作成する +# Baseクラスを作成 +class Base(DeclarativeBase): + pass + +# Baseクラスを継承したモデルを作成 +# # usersテーブルのモデルUsers +# class Users(Base): +# __tablename__ = 'users' +# user_id = mapped_column(Integer, primary_key=True, autoincrement=True) +# uid = mapped_column(String(255), nullable=False) +# name = mapped_column(String(255), nullable=False) +# email = mapped_column(String(255), nullable=False) +# role = mapped_column(String(255), nullable=False) +# logs(仮)テーブルのモデルLogs(仮) +class Logs(Base): + __tablename__ = 'edited_logs' + id: Mapped[int] = mapped_column(primary_key=True, index=True) + user_id: Mapped[int] = mapped_column(nullable=False) + date:Mapped[datetime.date] = mapped_column(nullable=False) + reporting:Mapped[datetime.time] = mapped_column(nullable=False) + leave:Mapped[datetime.time] = mapped_column(nullable=False) +# clusterテーブルのモデルCluster +class Cluster(Base): + __tablename__ = 'clusters' + id: Mapped[int] = mapped_column(primary_key=True, index=True) + date: Mapped[datetime.date] = mapped_column(nullable=False) + reporting: Mapped[bool] = mapped_column(nullable=False) + average: Mapped[float] = mapped_column(nullable=False) + sd: Mapped[float] = mapped_column(nullable=False) + count: Mapped[int] = mapped_column(nullable=False) + user_id: Mapped[int] = mapped_column(nullable=False) + +class User(Base): + __tablename__ = 'users' + id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True) + created_at: Mapped[datetime.datetime] = mapped_column(nullable=False) + updated_at: Mapped[datetime.datetime] = mapped_column(nullable=False) + deleted_at: Mapped[datetime.datetime] = mapped_column(nullable=True) + uuid: Mapped[str] = mapped_column(nullable=False) + name: Mapped[str] = mapped_column(nullable=False) + email: Mapped[str] = mapped_column(nullable=False) + role: Mapped[int] = mapped_column(nullable=False) + beacon_id: Mapped[int] = mapped_column(nullable=False) + community_id: Mapped[int] = mapped_column(nullable=False) \ No newline at end of file diff --git a/probability/api/models/user.py b/probability/api/models/user.py new file mode 100644 index 0000000..17e5170 --- /dev/null +++ b/probability/api/models/user.py @@ -0,0 +1,14 @@ +from __future__ import annotations +from sqlalchemy import select +from sqlalchemy.orm import Session +from . import struct as st + +# 全てのユーザを取得する +def get_all_users_by_community(community:int, db: Session) -> list[st.User]: + users: list[st.User] = db.query(st.User).where(st.User.community_id == community).all() + return users + +# 特定のユーザを取得する +def get_user_by_id(db: Session, user_id: int) -> st.User | None: + user: st.User | None = db.scalar(select(st.User).where(st.User.id == user_id)) + return user \ No newline at end of file diff --git a/probability/api/requirements.txt b/probability/api/requirements.txt new file mode 100644 index 0000000..0dc2f24 --- /dev/null +++ b/probability/api/requirements.txt @@ -0,0 +1,26 @@ +annotated-types==0.6.0 +anyio==4.2.0 +click==8.1.7 +exceptiongroup==1.2.0 +fastapi==0.109.2 +h11==0.14.0 +idna==3.6 +mypy==1.9.0 +mypy-extensions==1.0.0 +mysqlclient==2.2.4 +numpy==1.24.4 +orjson==3.10.1 +pandas==2.0.3 +pydantic==2.6.0 +pydantic_core==2.16.1 +python-dateutil==2.9.0.post0 +pytz==2024.1 +scipy==1.10.1 +six==1.16.0 +sniffio==1.3.0 +SQLAlchemy @ git+https://github.com/sqlalchemy/sqlalchemy.git@a124a593c86325389a92903d2b61f40c34f6d6e2 +starlette==0.36.3 +tomli==2.0.1 +typing_extensions==4.9.0 +tzdata==2024.1 +uvicorn==0.27.0.post1 \ No newline at end of file diff --git a/probability/api/service/date.py b/probability/api/service/date.py new file mode 100644 index 0000000..a27a732 --- /dev/null +++ b/probability/api/service/date.py @@ -0,0 +1,35 @@ +from datetime import timedelta + +def convert_seconds_to_hms(seconds): + hours, remainder = divmod(seconds, 3600) + minutes, seconds = divmod(remainder, 60) + return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d}" + +def number_days(start_date, end_date, weekday): + # 開始日と終了日をdatetimeオブジェクトに変換 + # カウントを初期化 + day_count = 0 + # 開始日から終了日まで1日ずつ進みながら曜日を確認 + current_date = start_date + while current_date <= end_date: + # 曜日が weekday であればカウントを増やす + if current_date.weekday() == weekday: + day_count += 1 + # 次の日に進む + current_date += timedelta(days=1) + return day_count + +# def first_day_of_month(date): +# # 指定された日付をdatetimeオブジェクトに変換 +# date_obj = datetime.strptime(date.values[0][0], "%Y-%m-%d") +# # 月の最初の日を取得 +# first_date = date_obj.replace(day=1) +# return first_date + +# def last_day_of_month(date): +# # 指定された日付をdatetimeオブジェクトに変換 +# date_obj = datetime.strptime(date.values[0][0], "%Y-%m-%d") +# # 月の最後の日を取得 +# next_month = date_obj.replace(day=28) + timedelta(days=4) +# last_date = next_month - timedelta(days=next_month.day) +# return last_date \ No newline at end of file diff --git a/probability/api/service/normal_distribution.py b/probability/api/service/normal_distribution.py new file mode 100644 index 0000000..3f58112 --- /dev/null +++ b/probability/api/service/normal_distribution.py @@ -0,0 +1,40 @@ +from __future__ import annotations +from scipy.stats import norm +from models import struct as st + +def convert_to_seconds(time_str :str) -> int: + hours, minutes, seconds = map(int, time_str.split(':')) + return hours * 3600 + minutes * 60 + seconds + +# timeが発生する確率を計算 +def probability_from_normal_distribution(clusters: list[st.Cluster], time: str, day_count: int, yn: bool) -> float: + probabilities: list[float] = [] + for cluster in clusters: + # 時刻データを秒に変換 + time_seconds = convert_to_seconds(time) + if yn: + # timeより前に研究室に来ている割合を計算 + if cluster.sd == 0: + if time_seconds >= cluster.average: + n = 1 * cluster.count/day_count + else: + n = 0 + else: + n = norm.cdf(time_seconds, cluster.average, cluster.sd)* cluster.count/day_count + probabilities.append(n) + else: + # timeより後に研究室に来ている割合を計算 + if cluster.sd == 0: + if time_seconds <= cluster.average: + n = 1 * cluster.count/day_count + else: + n = 0 + else: + n = (1 - norm.cdf(time_seconds, cluster.average, cluster.sd))* cluster.count/day_count + probabilities.append(n) + probability = sum(probabilities) + return f'{probability*100:.2f}' + # if yn: + # print(f"{time}までに研究室に来ている確率は{probability*100:.2f}%です。") + # else: + # print(f"{time}以降に研究室に来ている確率は{probability*100:.2f}%です。") \ No newline at end of file diff --git a/probability/clustering/README.md b/probability/clustering/README.md new file mode 100644 index 0000000..b3f2980 --- /dev/null +++ b/probability/clustering/README.md @@ -0,0 +1,26 @@ +# Docker起動時のcron設定 + +## 1. crontabの編集モードを起動 + +```bash +crontab -e +``` + +## 2. 以下を記述 + +```text +0 9 * * 1 date >> /usr/clustering/server.log +0 9 * * 1 /usr/local/bin/cron.sh +``` + +## 3.cronの起動 + +```bash +service cron start +``` + +## 4.cronの確認 + +```bash +service cron status +``` diff --git a/probability/clustering/cron-clustering b/probability/clustering/cron-clustering new file mode 100644 index 0000000..39241bb --- /dev/null +++ b/probability/clustering/cron-clustering @@ -0,0 +1,2 @@ +*/1 * * * * date >> /usr/clustering/server.log +*/1 * * * * /usr/local/bin/cron.sh \ No newline at end of file diff --git a/probability/clustering/cron.sh b/probability/clustering/cron.sh new file mode 100644 index 0000000..3ff727f --- /dev/null +++ b/probability/clustering/cron.sh @@ -0,0 +1,2 @@ +. /root/env.sh +python3 /usr/clustering/src/main.py >> /usr/clustering/server.log 2>&1 \ No newline at end of file diff --git a/probability/clustering/dockerfile b/probability/clustering/dockerfile new file mode 100644 index 0000000..70f81a4 --- /dev/null +++ b/probability/clustering/dockerfile @@ -0,0 +1,28 @@ +FROM --platform=linux/arm64 python:3.8.18 + +WORKDIR /usr/clustering + +RUN apt update && apt install -y \ + libpq-dev \ + gcc \ + curl \ + git \ + vim \ + cron + +RUN mkdir src +COPY ./probability/clustering/src/ ./src +COPY ./probability/clustering/requirements.txt ./ + +RUN pip install --upgrade pip && \ + pip install -r requirements.txt + + +COPY ./probability/clustering/cron.sh /usr/local/bin/ +# RUN echo "*/1 * * * * date >> /usr/clustering/server.log" >> /etc/cron.d/cron-clustering +# RUN echo "*/1 * * * * /usr/local/bin/cron.sh" >> /etc/cron.d/cron-clustering +# RUN chmod 0644 /etc/cron.d/cron-clustering +RUN printenv | awk '{print "export " $1}' > /root/env.sh +RUN chmod +x /usr/local/bin/cron.sh /root/env.sh + +# CMD ["cron", "-f"] \ No newline at end of file diff --git a/probability/clustering/entrypoint.sh b/probability/clustering/entrypoint.sh new file mode 100644 index 0000000..544562d --- /dev/null +++ b/probability/clustering/entrypoint.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +printenv | awk '{print "export " $1}' > /root/env.sh +/usr/sbin/cron -f \ No newline at end of file diff --git a/probability/clustering/requirements.txt b/probability/clustering/requirements.txt new file mode 100644 index 0000000..0dc2f24 --- /dev/null +++ b/probability/clustering/requirements.txt @@ -0,0 +1,26 @@ +annotated-types==0.6.0 +anyio==4.2.0 +click==8.1.7 +exceptiongroup==1.2.0 +fastapi==0.109.2 +h11==0.14.0 +idna==3.6 +mypy==1.9.0 +mypy-extensions==1.0.0 +mysqlclient==2.2.4 +numpy==1.24.4 +orjson==3.10.1 +pandas==2.0.3 +pydantic==2.6.0 +pydantic_core==2.16.1 +python-dateutil==2.9.0.post0 +pytz==2024.1 +scipy==1.10.1 +six==1.16.0 +sniffio==1.3.0 +SQLAlchemy @ git+https://github.com/sqlalchemy/sqlalchemy.git@a124a593c86325389a92903d2b61f40c34f6d6e2 +starlette==0.36.3 +tomli==2.0.1 +typing_extensions==4.9.0 +tzdata==2024.1 +uvicorn==0.27.0.post1 \ No newline at end of file diff --git a/probability/clustering/src/lib/mysql.py b/probability/clustering/src/lib/mysql.py new file mode 100644 index 0000000..aa4eb1c --- /dev/null +++ b/probability/clustering/src/lib/mysql.py @@ -0,0 +1,23 @@ +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker +import os + +# 接続したいDBへの接続情報 +user_name = os.getenv('USER_NAME') +password = os.getenv('PASSWORD') +host = os.getenv('HOST') +port = os.getenv('PORT') +database = os.getenv('DATABASE') + +SQLALCHEMY_DATABASE_URL = "mysql://" + user_name + ":" + password + "@" + host + ":" +port + "/" + database + "?charset=utf8&unix_socket=/var/run/mysqld/mysqld.sock" +print(SQLALCHEMY_DATABASE_URL) + +engine = create_engine(SQLALCHEMY_DATABASE_URL) +SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) + +def get_db() : + db = SessionLocal() + try: + yield db + finally: + db.close() \ No newline at end of file diff --git a/probability/clustering/src/main.py b/probability/clustering/src/main.py new file mode 100644 index 0000000..2293638 --- /dev/null +++ b/probability/clustering/src/main.py @@ -0,0 +1,73 @@ +import numpy as np +from datetime import date, timedelta +from datetime import datetime +from service import clustering, date_operation as do +from models import log, users, editedlog, cluster +from lib.mysql import get_db + +def time_to_seconds(point): + try: + # 時、分、秒を秒数に変換 + return sum(x * float(t) for x, t in zip([3600, 60, 1], point.split(":"))) + except (ValueError, AttributeError): + # 無効な入力をスキップ(例: None, 空文字列, 不正フォーマット) + return None + +def main(): + db = get_db().__next__() + # user一覧を取得 + user_list = users.get_all_users(db) + now_date = date.today() + # 1. ユーザーごとに繰り返す + for user in user_list: + user_id = user.id + # ログの取得、ログの編集、dbへの格納 + # dbからログを取得 + df = log.get_log_by_userId_and_period(db, user_id, now_date - timedelta(days=7), now_date - timedelta(days=1)) + # ログを加工 + df = do.formatting_log(df) + # dbへ格納 + for i in range(len(df)): + editedlog.add_edited_log(db, user_id, df['date'][i], df['reporting'][i], df['leave'][i]) + # 編集されたログから各曜日の登下校の時間をクラスタリング + # 月〜日曜日まで7回繰り返す + for i in range(7): + result_entry: list[list[int]] = [] + result_exit: list[list[int]] = [] + # 1. 曜日ごとにDataFrameを取得 + df_day = editedlog.get_edited_logs_by_userId_and_day(db ,user_id, i) + # 2. 入退室の時間をstr型のリストで取得 + df_entry: list[datetime] = df_day['reporting'].to_list() + df_exit: list[datetime] = df_day['leaving'].to_list() + # 時間データを秒単位に変換 + data_seconds_entry: list[int] = [sum(x * int(t) for x, t in zip([3600, 60, 1], point.strftime('%H:%M:%S').split(":"))) for point in df_entry] + data_seconds_exit = np.array([ + time_to_seconds(point) + for point in df_exit + if time_to_seconds(point) is not None # 無効な入力をフィルタリング + ]) + # クラスタリングの実行 + # 結果をリストに格納 + result_entry = clustering.xmeans(data_seconds_entry) + result_exit = clustering.xmeans(data_seconds_exit) + # 5. クラスタの数だけ繰り返す + for j in range(len(result_entry)): + # 入室 + # 1. クラスタの平均・標準偏差・クラスタに所属するデータの数を求める + mean_entry = np.mean(result_entry[j]) + std_entry = np.std(result_entry[j]) + count_entry = len(result_entry[j]) + # 2. dbに格納 + day = date.today() - timedelta(days=7) + timedelta(days=i) + cluster.add_cluster(db, user_id, day, True, mean_entry, std_entry, count_entry) + for j in range(len(result_exit)): + # 退室 + # 1. クラスタの平均・標準偏差・クラスタに所属するデータの数を求める + mean_exit = np.mean(result_exit[j]) + std_exit = np.std(result_exit[j]) + count_exit = len(result_exit[j]) + # 2. dbに格納 + cluster.add_cluster(db, user_id, day, False, mean_exit, std_exit, count_exit) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/probability/clustering/src/models/cluster.py b/probability/clustering/src/models/cluster.py new file mode 100644 index 0000000..598ea82 --- /dev/null +++ b/probability/clustering/src/models/cluster.py @@ -0,0 +1,12 @@ +from sqlalchemy.orm import Session +import datetime +from . import struct as st + +# クラスタをdbへ格納 +def add_cluster(db: Session, uid: int, day: datetime.date, reporting: bool, mean: float, std: float, count: int) -> st.Cluster: + # clusterを追加 + cluster:st.Cluster = st.Cluster(user_id=uid, date=day, reporting=reporting, average=mean, sd=std, count=count) + db.add(cluster) + db.commit() + db.refresh(cluster) + return cluster \ No newline at end of file diff --git a/probability/clustering/src/models/editedlog.py b/probability/clustering/src/models/editedlog.py new file mode 100644 index 0000000..6f709ce --- /dev/null +++ b/probability/clustering/src/models/editedlog.py @@ -0,0 +1,23 @@ +import pandas as pd +from sqlalchemy.orm import Session +from . import struct as st + +# userIdから特定の曜日のEditedLogを取得し、dataframeで返す +def get_edited_logs_by_userId_and_day(db: Session, userId: int, day: int) -> pd.DataFrame: + # edited_logsを取得 + q = db.query(st.EditedLog).filter(st.EditedLog.user_id == userId) + edited_logs = pd.read_sql(q.statement, db.bind) + # 指定の曜日のデータのみを抽出 + date = pd.to_datetime(edited_logs['date']) + edited_logs['day'] = date.dt.weekday + edited_logs = edited_logs[edited_logs['day'] == day] + return edited_logs + +# EditedLogを追加する +def add_edited_log(db: Session, uid: int, date: str, reporting_time: str, leave_time: str) -> st.EditedLog: + # edited_logを追加 + edited_log:st.EditedLog = st.EditedLog(user_id=uid, date=date, reporting=reporting_time, leave=leave_time) + db.add(edited_log) + db.commit() + db.refresh(edited_log) + return edited_log \ No newline at end of file diff --git a/probability/clustering/src/models/log.py b/probability/clustering/src/models/log.py new file mode 100644 index 0000000..19b1cbe --- /dev/null +++ b/probability/clustering/src/models/log.py @@ -0,0 +1,19 @@ +from __future__ import annotations +import pandas as pd +from datetime import date +from sqlalchemy.orm import Session +from . import struct as st + +# userIdから指定期間のlogを取得し、dataframeで返す +def get_log_by_userId_and_period(db: Session, userId: int, start: date, end: date) -> pd.DataFrame: + # Logを取得 + q = db.query(st.Log.start_at, st.Log.end_at, st.Log.user_id).filter(st.Log.user_id == userId, st.Log.start_at >= start, st.Log.start_at<= end) + logs = pd.read_sql(q.statement, db.bind) + return logs + +# userIdから全てのlogを取得し、dataframeで返す +def get_all_logs_by_userId(db: Session, userId: int) -> pd.DataFrame: + # Logを取得 + q = db.query(st.Log.start_at, st.Log.end_at, st.Log.user_id).filter(st.Log.user_id == userId) + logs = pd.read_sql(q.statement, db.bind) + return logs \ No newline at end of file diff --git a/probability/clustering/src/models/struct.py b/probability/clustering/src/models/struct.py new file mode 100644 index 0000000..015dab4 --- /dev/null +++ b/probability/clustering/src/models/struct.py @@ -0,0 +1,64 @@ +# sqlalchemyライブラリから使用する型などをインポート +from sqlalchemy.orm import ( + DeclarativeBase, + Mapped, + mapped_column +) +import datetime + +# Baseクラスを作成 +class Base(DeclarativeBase): + pass + +# Baseクラスを継承したモデルを作成 +# # usersテーブルのモデルUsers +# class Users(Base): +# __tablename__ = 'users' +# user_id = mapped_column(Integer, primary_key=True, autoincrement=True) +# uid = mapped_column(String(255), nullable=False) +# name = mapped_column(String(255), nullable=False) +# email = mapped_column(String(255), nullable=False) +# role = mapped_column(String(255), nullable=False) +# logs(仮)テーブルのモデルLogs(仮) +class EditedLog(Base): + __tablename__ = 'edited_logs' + id: Mapped[int] = mapped_column(primary_key=True, index=True) + user_id: Mapped[int] = mapped_column(nullable=False) + date: Mapped[datetime.date] = mapped_column(nullable=False) + reporting: Mapped[datetime.time] = mapped_column(nullable=False) + leaving: Mapped[datetime.time] = mapped_column(nullable=False) +# clusterテーブルのモデルCluster +class Cluster(Base): + __tablename__ = 'clusters' + id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True) + date: Mapped[datetime.date] = mapped_column(nullable=False) + reporting: Mapped[bool] = mapped_column(nullable=False) + average: Mapped[float] = mapped_column(nullable=False) + sd: Mapped[float] = mapped_column(nullable=False) + count: Mapped[int] = mapped_column(nullable=False) + user_id: Mapped[int] = mapped_column(nullable=False) + +class Log(Base): + __tablename__ = 'logs' + id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True) + created_at: Mapped[datetime.datetime] = mapped_column(nullable=False) + updated_at: Mapped[datetime.datetime] = mapped_column(nullable=False) + deleted_at: Mapped[datetime.datetime] = mapped_column(nullable=True) + room_id: Mapped[int] = mapped_column(nullable=False) + start_at: Mapped[datetime.datetime] = mapped_column(nullable=False) + end_at: Mapped[datetime.datetime] = mapped_column(nullable=False) + user_id: Mapped[int] = mapped_column(nullable=False) + rssi: Mapped[int] = mapped_column(nullable=False) + +class User(Base): + __tablename__ = 'users' + id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True) + created_at: Mapped[datetime.datetime] = mapped_column(nullable=False) + updated_at: Mapped[datetime.datetime] = mapped_column(nullable=False) + deleted_at: Mapped[datetime.datetime] = mapped_column(nullable=True) + uuid: Mapped[str] = mapped_column(nullable=False) + name: Mapped[str] = mapped_column(nullable=False) + email: Mapped[str] = mapped_column(nullable=False) + role: Mapped[int] = mapped_column(nullable=False) + beacon_id: Mapped[int] = mapped_column(nullable=False) + community_id: Mapped[int] = mapped_column(nullable=False) \ No newline at end of file diff --git a/probability/clustering/src/models/users.py b/probability/clustering/src/models/users.py new file mode 100644 index 0000000..274eedc --- /dev/null +++ b/probability/clustering/src/models/users.py @@ -0,0 +1,10 @@ +from __future__ import annotations +from sqlalchemy.orm import Session +from sqlalchemy import select +from . import struct as st + +# userを全て取得 +def get_all_users(db: Session) -> list[st.User]: + # usersを取得 + users = db.scalars(select(st.User)).all() + return users \ No newline at end of file diff --git a/probability/clustering/src/service/__init__.py b/probability/clustering/src/service/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/probability/clustering/src/service/clustering.py b/probability/clustering/src/service/clustering.py new file mode 100644 index 0000000..56dcf1a --- /dev/null +++ b/probability/clustering/src/service/clustering.py @@ -0,0 +1,59 @@ +from __future__ import annotations +import numpy as np + +def k_means_pp(data: list[int], k: int) -> list[list[int]]: + # 最初の中心点をランダムに選択 + centroids: list[int] = [data[np.random.choice(len(data))]] + while len(centroids) < k: + # 各データポイントと最も近い中心点までの距離の2乗を計算 + distances = [min([np.linalg.norm(point - np.array(centroid)) for centroid in centroids]) ** 2 for point in data] + # 新しい中心点を距離に比例した確率で選択 + new_centroid_index = np.random.choice(len(data), p=distances / np.sum(distances)) + centroids.append(data[new_centroid_index]) + return k_means_clustering(data, centroids) + +def k_means_clustering(data: list[int], centroids: list[int]) -> list[list[int]]: + while True: + # ステップ2: 最も近い点が同じデータでグループ化 + groups: list[list[int]] = [[] for _ in range(len(centroids))] + for point in data: + distances = [np.linalg.norm(point - np.array(centroid)) for centroid in centroids] + closest_centroid_index = np.argmin(distances) + groups[closest_centroid_index].append(point) + # ステップ3: グループごとの平均を求めてそれを新たな点とする + new_centroids = [np.mean(group, axis=0) for group in groups] + # ステップ5: 新しい中心点が以前の中心点と同じであれば終了 + if np.array_equal(centroids, new_centroids): + break + centroids = new_centroids + # ステップ6: クラスタリングした結果を出力 + clusters: list[list[int]] = groups + return clusters + +def xmeans(data: list[int]) -> list[list[int]]: + # 初期クラスタ数 + k = 1 + while True: + # K-means法でクラスタリング + clusters = k_means_pp(data, k) + # for i, cluster in enumerate(clusters): + # centroid_time = cluster["centroid"] + # cluster_points = cluster["points"] + # print(f"Cluster {i + 1}: Centroid = {centroid_time}, Points = {cluster_points}") + # クラスタごとにデータポイントを秒に変換 + # data_seconds = [[sum(x * int(t) for x, t in zip([3600, 60, 1], point.split(":"))) for point in cluster['points']] for cluster in clusters] + # print(data_seconds) + # クラスタごとにデータ分散を計算 + # cluster_variances = [np.var(cluster_data) for cluster_data in data_seconds] + cluster_variances = [np.var(cluster) for cluster in clusters] + # クラスタ内のデータ分散の平均を計算 + avg_cluster_variance = np.mean(cluster_variances) + # print(cluster_variances) + # print(f"クラスタ数: {k}, クラスタ内のデータ分散の平均: {avg_cluster_variance}") + # print("") + # クラスタ内のデータ分散が閾値以下なら終了 + if avg_cluster_variance < 30000000: + break + # クラスタ数を増やして再実行 + k += 1 + return clusters \ No newline at end of file diff --git a/probability/clustering/src/service/date_operation.py b/probability/clustering/src/service/date_operation.py new file mode 100644 index 0000000..ff7b246 --- /dev/null +++ b/probability/clustering/src/service/date_operation.py @@ -0,0 +1,44 @@ +import pandas as pd +from datetime import datetime + +def identification_day(df: pd.DataFrame) -> pd.DataFrame: + df_date= df['date'].to_list() + # 日付から曜日を取得してDataFrameに追加 + day: list[int] = [] + for i in range(len(df_date)): + # 時間の文字列をdatetime型に変換 + date_object = datetime.strptime(df_date[i], '%Y-%m-%d') + day.append(date_object.weekday()) + df['day'] = day + # print(df) + return df + + +def formatting_log(df: pd.DataFrame) -> pd.DataFrame: + # 時間の文字列をdatetime型に変換 + df['start_at'] = pd.to_datetime(df['start_at']) + df['end_at'] = pd.to_datetime(df['end_at']) + + # 日付ごとに最初の入室と最後の入室を取得 + result_data = [] + grouped_data = df.groupby(df['start_at'].dt.date) + for date, group in grouped_data: + entry = group.loc[group['start_at'].idxmin()] + exit = group.loc[group['start_at'].idxmax()] + if entry['start_at'].date() != exit['end_at'].date(): + result_data.append({ + 'user_id': df['user_id'], + 'date': date, + 'entry': entry['start_at'].strftime('%H:%M:%S.%f')[:-3], + 'exit': None + }) + else: + result_data.append({ + 'user_id': df['user_id'], + 'date': date, + 'entry': entry['start_at'].strftime('%H:%M:%S.%f')[:-3], + 'exit': exit['end_at'].strftime('%H:%M:%S.%f')[:-3] + }) + # 結果をDataFrameに変換してreturn + result_df = pd.DataFrame(result_data) + return result_df \ No newline at end of file