From 0fc2b5b06807200d0dc3e85b934c51ff60136e93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Leonardo=20Pi=C3=B1eyro?= Date: Wed, 12 Oct 2022 17:15:16 -0300 Subject: [PATCH] Storing demo data in home folder instead of local --- pyproject.toml | 4 ++-- vectory/__init__.py | 2 +- vectory/cli.py | 3 ++- vectory/db/models.py | 10 +++++----- vectory/demo.py | 12 +++--------- vectory/utils.py | 6 ++++++ vectory/visualization/main.py | 8 ++++++-- vectory/visualization/run.py | 1 - 8 files changed, 25 insertions(+), 21 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 5556820..961ab9d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "vectory" -version = "0.1.4" +version = "0.1.5" description = "Streamline the benchmark and experimentation process of your models that rely on generating embeddings" authors = ["Pento "] readme = "README.md" @@ -32,7 +32,7 @@ classifiers= [ "Topic :: Software Development :: Libraries", ] -include = ["docker-compose.yml", "Dockerfile", ".dockerignore", "troubleshooting.md"] +include = ["docker-compose.yml", "Dockerfile", ".dockerignore", "TROUBLESHOOTING.md"] [tool.poetry.scripts] vectory = "vectory.cli:app" diff --git a/vectory/__init__.py b/vectory/__init__.py index 23aca2a..b63a917 100644 --- a/vectory/__init__.py +++ b/vectory/__init__.py @@ -8,6 +8,6 @@ from .demo import download_demo_data, prepare_demo_data # noqa from .es.client import ElasticKNNClient # noqa from .es.utils import load_csv_with_headers, load_embeddings_from_numpy # noqa -from .experiments import Experiment +from .experiments import Experiment # noqa from .indices import delete_index, list_indices, load_index, match_query # noqa from .spaces import EmbeddingSpace, compare_embedding_spaces # noqa diff --git a/vectory/cli.py b/vectory/cli.py index 8ddd33c..1982aad 100644 --- a/vectory/cli.py +++ b/vectory/cli.py @@ -24,6 +24,7 @@ from vectory.experiments import Experiment from vectory.indices import delete_index, list_indices, load_index from vectory.spaces import EmbeddingSpace, compare_embedding_spaces +from vectory.utils import get_vectory_dir from vectory.visualization.run import run_streamlit create_db_tables() @@ -77,7 +78,7 @@ def demo( ), ), data_path: Path = typer.Option( - "data/demo", + get_vectory_dir() / "demo", "--data-path", help="Path to the demo files", file_okay=False, diff --git a/vectory/db/models.py b/vectory/db/models.py index c33ea90..ce14bcf 100644 --- a/vectory/db/models.py +++ b/vectory/db/models.py @@ -4,13 +4,13 @@ from peewee import CharField, DateTimeField, IntegerField, Model from playhouse.sqlite_ext import ForeignKeyField, JSONField, SqliteExtDatabase +from vectory.utils import get_vectory_dir -default_folder_path = os.path.join(os.path.expanduser("~"), ".vectory") -db_path = os.path.join(default_folder_path, "main.db") +DB_PATH = get_vectory_dir() / "main.db" database = SqliteExtDatabase( - db_path, + DB_PATH, pragmas={ "journal_mode": "off", "synchronous": 0, @@ -216,8 +216,8 @@ def get_knn(embedding_space_name: str, metric: str): def create_db_tables(): - if not os.path.exists(default_folder_path): - os.makedirs(default_folder_path) + if not DB_PATH.parent.exists(): + os.makedirs(DB_PATH.parent, exist_ok=True) with database: database.create_tables( [ diff --git a/vectory/demo.py b/vectory/demo.py index d3b141f..305fd15 100644 --- a/vectory/demo.py +++ b/vectory/demo.py @@ -4,7 +4,7 @@ from pathlib import Path from typing import List, Union -import requests +import requests # type: ignore import typer from vectory.datasets import Dataset from vectory.db.models import EmbeddingSpaceModel @@ -30,7 +30,7 @@ def download_demo_data( ) -> None: if not os.path.isdir(data_path): - os.makedirs(data_path) + os.makedirs(data_path, exist_ok=True) base_url = "https://github.com/pentoai/vectory/releases/download/v0.1.1/" @@ -71,13 +71,7 @@ def prepare_demo_data( dataset = Dataset.get_or_create( name=dataset_name, - csv_path=str( - Path(__file__).parent.parent - / "data" - / "demo" - / dataset_name - / f"{dataset_name}-data.csv" - ), + csv_path=str(Path(data_path) / f"{dataset_name}-data.csv"), id_field="_idx", ) typer.secho(f"Dataset {dataset_name} created", fg="yellow") diff --git a/vectory/utils.py b/vectory/utils.py index 2680f8a..c5732fa 100644 --- a/vectory/utils.py +++ b/vectory/utils.py @@ -1,5 +1,11 @@ +from pathlib import Path + from coolname import generate def generate_name(name: str, words=3) -> str: return "-".join([*generate(words), name]) + + +def get_vectory_dir() -> Path: + return Path.home() / ".vectory" diff --git a/vectory/visualization/main.py b/vectory/visualization/main.py index 7480664..092f20e 100644 --- a/vectory/visualization/main.py +++ b/vectory/visualization/main.py @@ -80,7 +80,7 @@ def selection(dataset): selected_emb_space, model=model, similarity=similarity ) - except: + except Exception: st.warning( "There isn't any loaded index from the embedding space " + f"'{selected_emb_space}', with model: '{model}' " @@ -450,7 +450,11 @@ def main(): most_similar_indices_2, scores_2 = calculate_indices( selected_vector, index_2 ) - intersection = set(most_similar_indices_1) & set(most_similar_indices_1) + + # intersection = ( + # set(most_similar_indices_1) & set(most_similar_indices_1) + # ) + col1, col2 = st.columns(2) if df_1 is not None: diff --git a/vectory/visualization/run.py b/vectory/visualization/run.py index c6c3ac5..da5f921 100644 --- a/vectory/visualization/run.py +++ b/vectory/visualization/run.py @@ -1,4 +1,3 @@ -import os from pathlib import Path from streamlit import config