Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

♻️ Simplify connect by eliminating db and storage arguments #865

Merged
merged 4 commits into from
Sep 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 7 additions & 28 deletions docs/hub-cloud/02-connect-local-instance.ipynb
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"id": "b43d09d5-26d3-440d-b334-9643ec5248e6",
"metadata": {
Expand All @@ -20,12 +19,9 @@
},
"outputs": [],
"source": [
"# here, we construct a case in which the storage location of the previous instance was moved\n",
"!lamin load --unload\n",
"!lamin delete --force mydata\n",
"!lamin init --storage mydata\n",
"!rm -r ./mydata_new_loc\n",
"!mv mydata ./mydata_new_loc\n",
"!lamin load --unload"
]
},
Expand All @@ -40,7 +36,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "1be836bb",
"metadata": {},
Expand All @@ -49,24 +44,11 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "a5c94e19",
"metadata": {},
"source": [
"If the user is the instance owner, load the instance by name:\n",
"```\n",
"ln_setup.connect(\"mydata\")\n",
"```"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "c878d40a",
"metadata": {},
"source": [
"You can also load with a new default storage location:"
"If the user is the instance owner, load the instance by name:"
]
},
{
Expand All @@ -76,7 +58,7 @@
"metadata": {},
"outputs": [],
"source": [
"ln_setup.connect(\"mydata\", storage=\"./mydata_new_loc\")"
"ln_setup.connect(\"mydata\")"
]
},
{
Expand All @@ -94,18 +76,16 @@
"\n",
"assert ln_setup.settings.instance.storage.type_is_cloud == False\n",
"assert ln_setup.settings.instance.name == \"mydata\"\n",
"assert (\n",
" ln_setup.settings.instance.storage.root.as_posix()\n",
" == Path(\"./mydata_new_loc\").resolve().as_posix()\n",
")\n",
"\n",
"root_path = Path(\"./mydata\").resolve()\n",
"assert ln_setup.settings.instance.storage.root == root_path\n",
"assert (\n",
" ln_setup.settings.instance.db\n",
" == f\"sqlite:///{Path('./mydata_new_loc').resolve().as_posix()}/{ln_setup.settings.instance._id.hex}.lndb\"\n",
" == f\"sqlite:///{root_path.as_posix()}/{ln_setup.settings.instance._id.hex}.lndb\"\n",
")"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "0986a5f3",
"metadata": {},
Expand All @@ -129,7 +109,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "e3701120",
"metadata": {},
Expand Down Expand Up @@ -166,7 +145,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
"version": "3.9.17"
},
"vscode": {
"interpreter": {
Expand Down
97 changes: 29 additions & 68 deletions lamindb_setup/_connect_instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,19 +185,28 @@ def _connect_instance(


@unlock_cloud_sqlite_upon_exception(ignore_prev_locker=True)
def connect(
slug: str, *, db: str | None = None, storage: UPathStr | None = None, **kwargs
) -> str | tuple | None:
def connect(slug: str, **kwargs) -> str | tuple | None:
"""Connect to instance.
Args:
slug: The instance slug `account_handle/instance_name` or URL.
If the instance is owned by you, it suffices to pass the instance name.
db: Load the instance with an updated database URL.
storage: Load the instance with an updated default storage.
"""
isettings: InstanceSettings = None # type: ignore
# validate kwargs
valid_kwargs = {
"_db",
"_write_settings",
"_raise_not_found_error",
"_test",
"_user",
}
for kwarg in kwargs:
if kwarg not in valid_kwargs:
raise TypeError(f"connect() got unexpected keyword argument '{kwarg}'")

isettings: InstanceSettings = None # type: ignore
# _db is still needed because it is called in init
_db: str | None = kwargs.get("_db", None)
_write_settings: bool = kwargs.get("_write_settings", True)
_raise_not_found_error: bool = kwargs.get("_raise_not_found_error", True)
_test: bool = kwargs.get("_test", False)
Expand Down Expand Up @@ -227,7 +236,9 @@ def connect(
close_instance(mute=True)

try:
isettings = _connect_instance(owner, name, db=db, access_token=access_token)
isettings = _connect_instance(
owner, name, db=_db, access_token=access_token
)
except InstanceNotFoundError as e:
if _raise_not_found_error:
raise e
Expand All @@ -239,8 +250,6 @@ def connect(
# _user is passed to lock cloud sqite for this user in isettings._load_db()
# has no effect if _user is None or if not cloud sqlite instance
isettings._locker_user = _user
if storage is not None:
update_isettings_with_storage(isettings, storage)
isettings._persist(write_to_disk=_write_settings)
if _test:
return None
Expand Down Expand Up @@ -268,8 +277,6 @@ def connect(
if _TEST_FAILED_LOAD:
raise RuntimeError("Technical testing error.")

if storage is not None and isettings.dialect == "sqlite":
update_root_field_in_default_storage(isettings)
# below is for backfilling the instance_uid value
# we'll enable it once more people migrated to 0.71.0
# ssettings_record = isettings.storage.record
Expand Down Expand Up @@ -300,6 +307,17 @@ def connect(
return None


def load(slug: str) -> str | tuple | None:
"""Connect to instance and set ``auto-connect`` to true.
This is exactly the same as ``ln.connect()`` except for that
``ln.connect()`` doesn't change the state of ``auto-connect``.
"""
result = connect(slug)
settings.auto_connect = True
return result


def migrate_lnschema_bionty(
isettings: InstanceSettings, no_lnschema_bionty_file: Path, write_file: bool = True
):
Expand Down Expand Up @@ -396,22 +414,6 @@ def migrate_lnschema_bionty(
conn.close()


def load(
slug: str,
*,
db: str | None = None,
storage: UPathStr | None = None,
) -> str | tuple | None:
"""Connect to instance and set ``auto-connect`` to true.
This is exactly the same as ``ln.connect()`` except for that
``ln.connect()`` doesn't change the state of ``auto-connect``.
"""
result = connect(slug, db=db, storage=storage)
settings.auto_connect = True
return result


def get_owner_name_from_identifier(identifier: str):
if "/" in identifier:
if identifier.startswith("https://lamin.ai/"):
Expand All @@ -427,44 +429,3 @@ def get_owner_name_from_identifier(identifier: str):
owner = settings.user.handle
name = identifier
return owner, name


def update_isettings_with_storage(
isettings: InstanceSettings, storage: UPathStr
) -> None:
ssettings = StorageSettings(storage)
if ssettings.type_is_cloud:
try: # triggering ssettings.id makes a lookup in the storage table
logger.success(f"loaded storage: {ssettings.id} / {ssettings.root_as_str}")
except RuntimeError as e:
logger.error(
"storage not registered!\n"
"load instance without the `storage` arg and register storage root: "
f"`lamin set storage --storage {storage}`"
)
raise e
else:
# local storage
# assumption is you want to merely update the storage location
isettings._storage = ssettings # need this here already
# update isettings in place
isettings._storage = ssettings


# this is different from register!
# register registers a new storage location
# update_root_field_in_default_storage updates the root
# field in the default storage locations
def update_root_field_in_default_storage(isettings: InstanceSettings):
from lnschema_core.models import Storage

storages = Storage.objects.all()
if len(storages) != 1:
raise RuntimeError(
"You have several storage locations: Can't identify in which storage"
" location the root column is to be updated!"
)
storage = storages[0]
storage.root = isettings.storage.root_as_str
storage.save()
logger.save(f"updated storage root {storage.id} to {isettings.storage.root_as_str}")
2 changes: 1 addition & 1 deletion lamindb_setup/_init_instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ def validate_init_args(
instance_slug = f"{owner_str}/{name_str}"
response = connect(
instance_slug,
db=db,
_db=db,
_raise_not_found_error=False,
_test=_test,
_write_settings=_write_settings,
Expand Down
11 changes: 8 additions & 3 deletions tests/hub-cloud/test_connect_instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,11 @@ def test_connect_after_private_public_switch():


def test_connect_with_db_parameter():
# no more db parameter, it is _db now and is hidden in kwargs
# this also tests that only allowed kwargs can be used
with pytest.raises(TypeError):
ln_setup.connect("laminlabs/lamindata", db="some_db")

if os.getenv("LAMIN_ENV") == "prod":
# take a write-level access collaborator
ln_setup.login("testuser1")
Expand All @@ -113,7 +118,7 @@ def test_connect_with_db_parameter():
assert "root" in ln_setup.settings.instance.db
# test load from provided db argument
db = "postgresql://testdbuser:testpwd@database2.cmyfs24wugc3.us-east-1.rds.amazonaws.com:5432/db1"
ln_setup.connect("laminlabs/lamindata", db=db, _test=True)
ln_setup.connect("laminlabs/lamindata", _db=db, _test=True)
assert "testdbuser" in ln_setup.settings.instance.db
# test ignore loading from cache because hub result has >read access
ln_setup.connect("laminlabs/lamindata", _test=True)
Expand All @@ -125,7 +130,7 @@ def test_connect_with_db_parameter():
ln_setup.connect("laminlabs/lamindata", _test=True)
assert "root" in ln_setup.settings.instance.db
# now pass the connection string
ln_setup.connect("laminlabs/lamindata", db=db, _test=True)
ln_setup.connect("laminlabs/lamindata", _db=db, _test=True)
assert "testdbuser" in ln_setup.settings.instance.db
# now the cache is used
ln_setup.connect("laminlabs/lamindata", _test=True)
Expand All @@ -134,7 +139,7 @@ def test_connect_with_db_parameter():
# test corrupted input
db_corrupted = "postgresql://testuser:testpwd@wrongserver:5432/db1"
with pytest.raises(ValueError) as error:
ln_setup.connect("laminlabs/lamindata", db=db_corrupted, _test=True)
ln_setup.connect("laminlabs/lamindata", _db=db_corrupted, _test=True)
assert error.exconly().startswith(
"ValueError: The local differs from the hub database information"
)
2 changes: 1 addition & 1 deletion tests/storage/test_storage_access.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def test_connect_instance_with_private_storage_and_no_storage_access():
# storage during connection
ln_setup.connect(
"laminlabs/test-instance-private-postgres",
db=os.environ["TEST_INSTANCE_PRIVATE_POSTGRES"],
_db=os.environ["TEST_INSTANCE_PRIVATE_POSTGRES"],
_test=True,
)
# accessing storage in the instance should fail:
Expand Down
Loading