Skip to content

Commit

Permalink
♻️ Simplify connect by eliminating db and storage arguments (#865)
Browse files Browse the repository at this point in the history
* eliminate db and storage from connect

* fix tests

* fix

* test kwargs validation
  • Loading branch information
Koncopd authored Sep 25, 2024
1 parent 6498329 commit be78ca8
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 101 deletions.
35 changes: 7 additions & 28 deletions docs/hub-cloud/02-connect-local-instance.ipynb
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"id": "b43d09d5-26d3-440d-b334-9643ec5248e6",
"metadata": {
Expand All @@ -20,12 +19,9 @@
},
"outputs": [],
"source": [
"# here, we construct a case in which the storage location of the previous instance was moved\n",
"!lamin load --unload\n",
"!lamin delete --force mydata\n",
"!lamin init --storage mydata\n",
"!rm -r ./mydata_new_loc\n",
"!mv mydata ./mydata_new_loc\n",
"!lamin load --unload"
]
},
Expand All @@ -40,7 +36,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "1be836bb",
"metadata": {},
Expand All @@ -49,24 +44,11 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "a5c94e19",
"metadata": {},
"source": [
"If the user is the instance owner, load the instance by name:\n",
"```\n",
"ln_setup.connect(\"mydata\")\n",
"```"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "c878d40a",
"metadata": {},
"source": [
"You can also load with a new default storage location:"
"If the user is the instance owner, load the instance by name:"
]
},
{
Expand All @@ -76,7 +58,7 @@
"metadata": {},
"outputs": [],
"source": [
"ln_setup.connect(\"mydata\", storage=\"./mydata_new_loc\")"
"ln_setup.connect(\"mydata\")"
]
},
{
Expand All @@ -94,18 +76,16 @@
"\n",
"assert ln_setup.settings.instance.storage.type_is_cloud == False\n",
"assert ln_setup.settings.instance.name == \"mydata\"\n",
"assert (\n",
" ln_setup.settings.instance.storage.root.as_posix()\n",
" == Path(\"./mydata_new_loc\").resolve().as_posix()\n",
")\n",
"\n",
"root_path = Path(\"./mydata\").resolve()\n",
"assert ln_setup.settings.instance.storage.root == root_path\n",
"assert (\n",
" ln_setup.settings.instance.db\n",
" == f\"sqlite:///{Path('./mydata_new_loc').resolve().as_posix()}/{ln_setup.settings.instance._id.hex}.lndb\"\n",
" == f\"sqlite:///{root_path.as_posix()}/{ln_setup.settings.instance._id.hex}.lndb\"\n",
")"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "0986a5f3",
"metadata": {},
Expand All @@ -129,7 +109,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "e3701120",
"metadata": {},
Expand Down Expand Up @@ -166,7 +145,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
"version": "3.9.17"
},
"vscode": {
"interpreter": {
Expand Down
97 changes: 29 additions & 68 deletions lamindb_setup/_connect_instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,19 +185,28 @@ def _connect_instance(


@unlock_cloud_sqlite_upon_exception(ignore_prev_locker=True)
def connect(
slug: str, *, db: str | None = None, storage: UPathStr | None = None, **kwargs
) -> str | tuple | None:
def connect(slug: str, **kwargs) -> str | tuple | None:
"""Connect to instance.
Args:
slug: The instance slug `account_handle/instance_name` or URL.
If the instance is owned by you, it suffices to pass the instance name.
db: Load the instance with an updated database URL.
storage: Load the instance with an updated default storage.
"""
isettings: InstanceSettings = None # type: ignore
# validate kwargs
valid_kwargs = {
"_db",
"_write_settings",
"_raise_not_found_error",
"_test",
"_user",
}
for kwarg in kwargs:
if kwarg not in valid_kwargs:
raise TypeError(f"connect() got unexpected keyword argument '{kwarg}'")

isettings: InstanceSettings = None # type: ignore
# _db is still needed because it is called in init
_db: str | None = kwargs.get("_db", None)
_write_settings: bool = kwargs.get("_write_settings", True)
_raise_not_found_error: bool = kwargs.get("_raise_not_found_error", True)
_test: bool = kwargs.get("_test", False)
Expand Down Expand Up @@ -227,7 +236,9 @@ def connect(
close_instance(mute=True)

try:
isettings = _connect_instance(owner, name, db=db, access_token=access_token)
isettings = _connect_instance(
owner, name, db=_db, access_token=access_token
)
except InstanceNotFoundError as e:
if _raise_not_found_error:
raise e
Expand All @@ -239,8 +250,6 @@ def connect(
# _user is passed to lock cloud sqite for this user in isettings._load_db()
# has no effect if _user is None or if not cloud sqlite instance
isettings._locker_user = _user
if storage is not None:
update_isettings_with_storage(isettings, storage)
isettings._persist(write_to_disk=_write_settings)
if _test:
return None
Expand Down Expand Up @@ -268,8 +277,6 @@ def connect(
if _TEST_FAILED_LOAD:
raise RuntimeError("Technical testing error.")

if storage is not None and isettings.dialect == "sqlite":
update_root_field_in_default_storage(isettings)
# below is for backfilling the instance_uid value
# we'll enable it once more people migrated to 0.71.0
# ssettings_record = isettings.storage.record
Expand Down Expand Up @@ -300,6 +307,17 @@ def connect(
return None


def load(slug: str) -> str | tuple | None:
"""Connect to instance and set ``auto-connect`` to true.
This is exactly the same as ``ln.connect()`` except for that
``ln.connect()`` doesn't change the state of ``auto-connect``.
"""
result = connect(slug)
settings.auto_connect = True
return result


def migrate_lnschema_bionty(
isettings: InstanceSettings, no_lnschema_bionty_file: Path, write_file: bool = True
):
Expand Down Expand Up @@ -396,22 +414,6 @@ def migrate_lnschema_bionty(
conn.close()


def load(
slug: str,
*,
db: str | None = None,
storage: UPathStr | None = None,
) -> str | tuple | None:
"""Connect to instance and set ``auto-connect`` to true.
This is exactly the same as ``ln.connect()`` except for that
``ln.connect()`` doesn't change the state of ``auto-connect``.
"""
result = connect(slug, db=db, storage=storage)
settings.auto_connect = True
return result


def get_owner_name_from_identifier(identifier: str):
if "/" in identifier:
if identifier.startswith("https://lamin.ai/"):
Expand All @@ -427,44 +429,3 @@ def get_owner_name_from_identifier(identifier: str):
owner = settings.user.handle
name = identifier
return owner, name


def update_isettings_with_storage(
isettings: InstanceSettings, storage: UPathStr
) -> None:
ssettings = StorageSettings(storage)
if ssettings.type_is_cloud:
try: # triggering ssettings.id makes a lookup in the storage table
logger.success(f"loaded storage: {ssettings.id} / {ssettings.root_as_str}")
except RuntimeError as e:
logger.error(
"storage not registered!\n"
"load instance without the `storage` arg and register storage root: "
f"`lamin set storage --storage {storage}`"
)
raise e
else:
# local storage
# assumption is you want to merely update the storage location
isettings._storage = ssettings # need this here already
# update isettings in place
isettings._storage = ssettings


# this is different from register!
# register registers a new storage location
# update_root_field_in_default_storage updates the root
# field in the default storage locations
def update_root_field_in_default_storage(isettings: InstanceSettings):
from lnschema_core.models import Storage

storages = Storage.objects.all()
if len(storages) != 1:
raise RuntimeError(
"You have several storage locations: Can't identify in which storage"
" location the root column is to be updated!"
)
storage = storages[0]
storage.root = isettings.storage.root_as_str
storage.save()
logger.save(f"updated storage root {storage.id} to {isettings.storage.root_as_str}")
2 changes: 1 addition & 1 deletion lamindb_setup/_init_instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ def validate_init_args(
instance_slug = f"{owner_str}/{name_str}"
response = connect(
instance_slug,
db=db,
_db=db,
_raise_not_found_error=False,
_test=_test,
_write_settings=_write_settings,
Expand Down
11 changes: 8 additions & 3 deletions tests/hub-cloud/test_connect_instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,11 @@ def test_connect_after_private_public_switch():


def test_connect_with_db_parameter():
# no more db parameter, it is _db now and is hidden in kwargs
# this also tests that only allowed kwargs can be used
with pytest.raises(TypeError):
ln_setup.connect("laminlabs/lamindata", db="some_db")

if os.getenv("LAMIN_ENV") == "prod":
# take a write-level access collaborator
ln_setup.login("testuser1")
Expand All @@ -113,7 +118,7 @@ def test_connect_with_db_parameter():
assert "root" in ln_setup.settings.instance.db
# test load from provided db argument
db = "postgresql://testdbuser:testpwd@database2.cmyfs24wugc3.us-east-1.rds.amazonaws.com:5432/db1"
ln_setup.connect("laminlabs/lamindata", db=db, _test=True)
ln_setup.connect("laminlabs/lamindata", _db=db, _test=True)
assert "testdbuser" in ln_setup.settings.instance.db
# test ignore loading from cache because hub result has >read access
ln_setup.connect("laminlabs/lamindata", _test=True)
Expand All @@ -125,7 +130,7 @@ def test_connect_with_db_parameter():
ln_setup.connect("laminlabs/lamindata", _test=True)
assert "root" in ln_setup.settings.instance.db
# now pass the connection string
ln_setup.connect("laminlabs/lamindata", db=db, _test=True)
ln_setup.connect("laminlabs/lamindata", _db=db, _test=True)
assert "testdbuser" in ln_setup.settings.instance.db
# now the cache is used
ln_setup.connect("laminlabs/lamindata", _test=True)
Expand All @@ -134,7 +139,7 @@ def test_connect_with_db_parameter():
# test corrupted input
db_corrupted = "postgresql://testuser:testpwd@wrongserver:5432/db1"
with pytest.raises(ValueError) as error:
ln_setup.connect("laminlabs/lamindata", db=db_corrupted, _test=True)
ln_setup.connect("laminlabs/lamindata", _db=db_corrupted, _test=True)
assert error.exconly().startswith(
"ValueError: The local differs from the hub database information"
)
2 changes: 1 addition & 1 deletion tests/storage/test_storage_access.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def test_connect_instance_with_private_storage_and_no_storage_access():
# storage during connection
ln_setup.connect(
"laminlabs/test-instance-private-postgres",
db=os.environ["TEST_INSTANCE_PRIVATE_POSTGRES"],
_db=os.environ["TEST_INSTANCE_PRIVATE_POSTGRES"],
_test=True,
)
# accessing storage in the instance should fail:
Expand Down

0 comments on commit be78ca8

Please sign in to comment.