Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dev/depman gpufeat #517

Open
wants to merge 410 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
410 commits
Select commit Hold shift + click to select a range
3210019
test_text_utils deps check
dcolinmorgan Nov 24, 2023
abb999e
test_text_utils deps check
dcolinmorgan Nov 24, 2023
5192f79
typos
dcolinmorgan Nov 24, 2023
0d165dd
ignore type
dcolinmorgan Nov 24, 2023
032193a
lint
dcolinmorgan Nov 24, 2023
75207ce
lint
dcolinmorgan Nov 24, 2023
1f539f1
lint
dcolinmorgan Nov 24, 2023
219555b
lint
dcolinmorgan Nov 24, 2023
8b53e6d
lint
dcolinmorgan Nov 24, 2023
3380fa5
lint
dcolinmorgan Nov 24, 2023
c12ed7e
push test logic
dcolinmorgan Nov 24, 2023
ecdd72b
push test logic
dcolinmorgan Nov 24, 2023
181abfa
push test logic
dcolinmorgan Nov 24, 2023
703e923
push test logic
dcolinmorgan Nov 24, 2023
5d7f750
lint
dcolinmorgan Nov 24, 2023
849baae
lint
dcolinmorgan Nov 24, 2023
6935a91
lint
dcolinmorgan Nov 24, 2023
c1f94c2
lint
dcolinmorgan Nov 24, 2023
eeaef0b
dep_flag lint
dcolinmorgan Nov 24, 2023
8d4c1df
assert logic
dcolinmorgan Nov 24, 2023
37ea918
lint
dcolinmorgan Nov 27, 2023
8e32e0c
lint
dcolinmorgan Nov 27, 2023
1f5f243
lint
dcolinmorgan Nov 27, 2023
20430e0
lint
dcolinmorgan Nov 27, 2023
a3bb113
remove conditional
dcolinmorgan Nov 27, 2023
9528e4a
sklearn assert
dcolinmorgan Nov 27, 2023
d170ace
sklearn assert
dcolinmorgan Nov 27, 2023
6a508c4
sklearn assert
dcolinmorgan Nov 27, 2023
f5812bd
sklearn assert
dcolinmorgan Nov 27, 2023
976d1dd
cumml _v_ test
dcolinmorgan Nov 27, 2023
2faf466
cumml _v_ test
dcolinmorgan Nov 27, 2023
2c96419
lint
dcolinmorgan Nov 27, 2023
ab73859
lint
dcolinmorgan Nov 27, 2023
a379787
lint
dcolinmorgan Nov 27, 2023
580ef32
lint
dcolinmorgan Nov 27, 2023
2c35bb2
lint
dcolinmorgan Nov 27, 2023
3d5aa45
lint
dcolinmorgan Nov 27, 2023
260c3b7
remove two too precise tests
dcolinmorgan Nov 27, 2023
23e4257
lint
dcolinmorgan Nov 27, 2023
c6417f9
lint
dcolinmorgan Nov 27, 2023
457ef7a
lint
dcolinmorgan Nov 27, 2023
69e59e7
add sklearn to core dep
dcolinmorgan Nov 27, 2023
6977d67
add sklearn to core dep
dcolinmorgan Nov 27, 2023
bba6c00
add sklearn to core dep
dcolinmorgan Nov 27, 2023
533a750
add sklearn+umap to core dep
dcolinmorgan Nov 27, 2023
20b1f16
add sklearn+umap to core dep
dcolinmorgan Nov 27, 2023
dd23f25
add sklearn+umap to core dep
dcolinmorgan Nov 27, 2023
3b59258
add scipy, dc to core dep
dcolinmorgan Nov 27, 2023
5e63074
add scipy, dc to core dep
dcolinmorgan Nov 27, 2023
6db86a3
revert to working
dcolinmorgan Nov 27, 2023
42f6a75
Merge branch 'dev/dep_man' of https://github.com/graphistry/pygraphis…
dcolinmorgan Nov 27, 2023
aadc84b
clsoe
dcolinmorgan Nov 27, 2023
edbdf37
remove has_
dcolinmorgan Nov 27, 2023
0ec47bb
np.all to allclose
dcolinmorgan Nov 27, 2023
139f7f9
lint
dcolinmorgan Nov 27, 2023
3223a27
revert allclose
dcolinmorgan Nov 27, 2023
c47df98
drop assert
dcolinmorgan Nov 27, 2023
26cd5e9
drop assert
dcolinmorgan Nov 27, 2023
e47fa35
drop assert
dcolinmorgan Nov 27, 2023
d8f9e6d
lint
dcolinmorgan Nov 27, 2023
1b9f32e
Merge branch 'master' into dev/depman_gpufeat
dcolinmorgan Nov 28, 2023
2751aa9
update merge gpu_feat+dep_man
dcolinmorgan Nov 28, 2023
9896f82
lint
dcolinmorgan Nov 28, 2023
64153ab
lint
dcolinmorgan Nov 28, 2023
d86ef4e
lint
dcolinmorgan Nov 28, 2023
c370598
lint
dcolinmorgan Nov 28, 2023
a3ea5d0
add cu_cat to ai extra deps
dcolinmorgan Nov 29, 2023
30ca9ee
update cu_cat version with dep_man
dcolinmorgan Nov 30, 2023
d0997b4
if cudf add to test
dcolinmorgan Dec 1, 2023
836a9f4
use cc cpu not dc
dcolinmorgan Dec 1, 2023
6646b73
use cc cpu not dc
dcolinmorgan Dec 1, 2023
cf74443
use cc cpu not dc
dcolinmorgan Dec 1, 2023
da72b63
lint cc not dc
dcolinmorgan Dec 1, 2023
b062d59
lint cc not dc
dcolinmorgan Dec 1, 2023
e0e401e
lint cc not dc
dcolinmorgan Dec 1, 2023
2a0a9af
better setup install for cucat
dcolinmorgan Dec 1, 2023
05a1329
better setup install for cucat
dcolinmorgan Dec 1, 2023
760687e
better setup install for cucat
dcolinmorgan Dec 1, 2023
ad2c703
lint
dcolinmorgan Dec 1, 2023
454331a
lint
dcolinmorgan Dec 1, 2023
6e1cd20
test dataset
dcolinmorgan Dec 1, 2023
8289d51
test dataset
dcolinmorgan Dec 1, 2023
1573e1c
test dataset
dcolinmorgan Dec 1, 2023
14edf7b
test dataset
dcolinmorgan Dec 1, 2023
565e9ac
lint
dcolinmorgan Dec 1, 2023
c97c204
assert swap
dcolinmorgan Dec 1, 2023
4eb824f
assert swap
dcolinmorgan Dec 1, 2023
070c576
assert swap
dcolinmorgan Dec 1, 2023
1c73235
assert swap
dcolinmorgan Dec 1, 2023
d53a306
update tests with depman
dcolinmorgan Dec 4, 2023
1904df5
respond to most comments
dcolinmorgan Dec 4, 2023
a9d3d9e
respond to most comments
dcolinmorgan Dec 4, 2023
0dd4ed6
respond to most comments
dcolinmorgan Dec 4, 2023
6007eb7
respond to tqdm, <2 column comments
dcolinmorgan Dec 5, 2023
6d0cb1c
respond to tqdm, <2 column comments
dcolinmorgan Dec 5, 2023
86378eb
respond to tqdm, <2 column comments
dcolinmorgan Dec 5, 2023
5b36dd0
respond to tqdm
dcolinmorgan Dec 5, 2023
90ca97a
Merge branch 'master' into dev/dep_man
dcolinmorgan Dec 5, 2023
08de406
tqdm set_descr error
dcolinmorgan Dec 5, 2023
b236337
tqdm set_descr error
dcolinmorgan Dec 5, 2023
85e1e24
tqdm not trange has "set_description"
dcolinmorgan Dec 5, 2023
c86cb53
tqdm not trange has "set_description"
dcolinmorgan Dec 5, 2023
5d5146f
tqdm not trange has "set_description"
dcolinmorgan Dec 5, 2023
8640971
tqdm.tqdm
dcolinmorgan Dec 5, 2023
58d9810
tqdm.tqdm
dcolinmorgan Dec 5, 2023
d02d480
fallback to lazy import
dcolinmorgan Dec 5, 2023
a39928c
fallback to lazy import
dcolinmorgan Dec 5, 2023
cedd9ad
half lazy import
dcolinmorgan Dec 5, 2023
dcfdd9c
smart import
dcolinmorgan Dec 5, 2023
cc8c4d2
smart import
dcolinmorgan Dec 5, 2023
79045df
smart import
dcolinmorgan Dec 5, 2023
7bb1cc9
merge dep_man/master
dcolinmorgan Dec 5, 2023
0e4b19d
lint
dcolinmorgan Dec 5, 2023
f7e97df
asser cucat logic
dcolinmorgan Dec 7, 2023
0372b7c
asser cucat logic
dcolinmorgan Dec 7, 2023
3e7f0e0
base install cucat (move to [ai])
dcolinmorgan Dec 7, 2023
3eff36e
install cucat to extra-heavy
dcolinmorgan Dec 7, 2023
fb9d37c
wow typo cu-cat
dcolinmorgan Dec 7, 2023
0ac9516
cu_cat dep/vers install
dcolinmorgan Dec 8, 2023
2326237
cu_cat dep/vers install
dcolinmorgan Dec 8, 2023
56a0e73
cu_cat full replace dc
dcolinmorgan Dec 8, 2023
ca2e7bf
assert cucat fallback
dcolinmorgan Dec 8, 2023
5fb1f28
better warning url
dcolinmorgan Dec 8, 2023
8a6008a
better warning url
dcolinmorgan Dec 8, 2023
9a364a7
all safe dfs
dcolinmorgan Dec 18, 2023
1ad8e96
all safe dfs
dcolinmorgan Dec 18, 2023
0f14d99
Merge branch 'master' into dev/depman_gpufeat
dcolinmorgan Dec 18, 2023
17beba0
edge concat interop + dc + cudf interop pd
dcolinmorgan Dec 22, 2023
5ec85fd
Revert "edge concat interop + dc + cudf interop pd"
dcolinmorgan Jan 3, 2024
1386f0b
+assert error +dc default
dcolinmorgan Jan 3, 2024
8bf48e5
+assert error +dc default
dcolinmorgan Jan 3, 2024
69b5f3f
dc_only_feature_test
dcolinmorgan Jan 3, 2024
3bc04fa
cupyx csr toarray for features_out
dcolinmorgan Jan 4, 2024
1544927
cupyx csr toarray for features_out
dcolinmorgan Jan 4, 2024
495c031
cupyx csr toarray for features_out
dcolinmorgan Jan 4, 2024
8a41d10
add gpu-umap test, allow cucat to test w/o gpu
dcolinmorgan Jan 4, 2024
26b4f94
add gpu-umap test, allow cucat to test w/o gpu
dcolinmorgan Jan 4, 2024
707b404
dirty_cat version with Table&SuperVectorizer
dcolinmorgan Jan 4, 2024
93c4021
better dimension try
dcolinmorgan Jan 5, 2024
bef055e
soln for gmem lim
dcolinmorgan Jan 12, 2024
bb4e67a
soln for gmem lim
dcolinmorgan Jan 12, 2024
8241a1a
soln for gmem lim
dcolinmorgan Jan 15, 2024
c8421ef
remove gpu-cucat test
dcolinmorgan Jan 19, 2024
5a65b51
req sklearn==1.3.2 for now
dcolinmorgan Jan 19, 2024
569d09f
more cudf acrobatics, deal with duplicate colnames
dcolinmorgan Jan 19, 2024
1fb98c0
more cudf acrobatics, deal with duplicate colnames
dcolinmorgan Jan 19, 2024
e62c8ab
tweaks for gpufeat, still issues with coo matrix scaling
dcolinmorgan Jan 22, 2024
773ba7d
Update feature_utils.py
dcolinmorgan Jan 22, 2024
7901010
tweaks for scaling after featurization
dcolinmorgan Jan 22, 2024
f857d2f
better interop with cu_cat
dcolinmorgan Jan 23, 2024
ba28dd0
Update test_feature_utils.py
dcolinmorgan Jan 23, 2024
00b1e88
better interop with cu-cat
dcolinmorgan Jan 23, 2024
9250f44
better interop with cu-cat
dcolinmorgan Jan 23, 2024
916bf4c
pyg+cucat tests passing
dcolinmorgan Jan 24, 2024
f4b8ed8
pyg cucat+umap tests closer
dcolinmorgan Jan 24, 2024
ee181c2
rollback for feat pytest, constants working
dcolinmorgan Jan 24, 2024
67e4732
lint
dcolinmorgan Jan 24, 2024
0504e2b
feats tests pass, many umap
dcolinmorgan Jan 25, 2024
ee08701
more test tweaks to handle cupy/cudf comparisons
dcolinmorgan Jan 25, 2024
974f800
more test tweaks to handle cupy/cudf comparisons
dcolinmorgan Jan 26, 2024
5fe7b87
more tweaks
dcolinmorgan Jan 26, 2024
8793913
safe gpu umap tweaks
dcolinmorgan Jan 29, 2024
c40ad22
safe gpu umap tweaks
dcolinmorgan Jan 29, 2024
31e2a41
closer to umap full pass
dcolinmorgan Jan 30, 2024
b00ab9b
more cudf df tries
dcolinmorgan Jan 30, 2024
462ae91
full umap pass
dcolinmorgan Jan 30, 2024
aede506
full umap pass
dcolinmorgan Jan 30, 2024
5ba3a83
lint
dcolinmorgan Jan 30, 2024
ba4a398
lint
dcolinmorgan Jan 30, 2024
19d7f46
lint
dcolinmorgan Jan 30, 2024
b90bb8b
!=0 > empty, safe cupy umap
dcolinmorgan Feb 2, 2024
82d537e
type error tweak
dcolinmorgan Feb 5, 2024
58d463b
type error tweak
dcolinmorgan Feb 5, 2024
1546db1
type error tweak
dcolinmorgan Feb 5, 2024
0b7dc9f
lint
dcolinmorgan Feb 5, 2024
7f72d09
general deduplicates handle ndf_reddit
dcolinmorgan Feb 5, 2024
f87982a
hardcode ndf_reddit duplicate squashing
dcolinmorgan Feb 5, 2024
ce3f089
tweaks to appease cudf
dcolinmorgan Feb 5, 2024
26f1621
lint
dcolinmorgan Feb 5, 2024
0c046bb
lint
dcolinmorgan Feb 5, 2024
ca7ab4a
lint
dcolinmorgan Feb 5, 2024
52a1216
lint
dcolinmorgan Feb 5, 2024
2d49231
lint
dcolinmorgan Feb 5, 2024
f871e10
remove test umap copy()
dcolinmorgan Feb 5, 2024
cc90767
for umapai test pass
dcolinmorgan Feb 5, 2024
5c66802
ai patch for n_comp>2
dcolinmorgan Feb 6, 2024
9103a7c
parameterize feature_engine tests
dcolinmorgan Feb 7, 2024
63ad9ae
lint
dcolinmorgan Feb 7, 2024
b8c28aa
handle feat_eng via test params
dcolinmorgan Feb 7, 2024
85f1a70
lint
dcolinmorgan Feb 7, 2024
e7b8137
small cc v dc tweaks
dcolinmorgan Feb 8, 2024
a771fd6
missing parameterized tests
dcolinmorgan Feb 8, 2024
1d4723d
Merge remote-tracking branch 'origin/master' into dev/depman_gpufeat
dcolinmorgan Feb 8, 2024
9db0dd4
update cu-cat version
dcolinmorgan Feb 8, 2024
68fd472
test tweaks
dcolinmorgan Feb 9, 2024
d10655a
test tweaks
dcolinmorgan Feb 9, 2024
5180b09
remove auto
dcolinmorgan Feb 9, 2024
d1fe703
lint
dcolinmorgan Feb 14, 2024
e9a0a68
better cudf passif, test hack
dcolinmorgan Feb 14, 2024
c7a7676
towards better feat-eng concrete
dcolinmorgan Feb 15, 2024
9f095e1
lint
dcolinmorgan Feb 16, 2024
8a67f27
concreting
dcolinmorgan Feb 16, 2024
23a2f91
concreted again
dcolinmorgan Feb 16, 2024
283086c
lint
dcolinmorgan Feb 16, 2024
b78fc6a
test lint
dcolinmorgan Feb 16, 2024
c7c715e
auto engine back
dcolinmorgan Feb 16, 2024
03f0fc3
umap test engine inject
dcolinmorgan Feb 19, 2024
50562b7
umap test engine inject
dcolinmorgan Feb 19, 2024
75ac2b0
umap test engine inject
dcolinmorgan Feb 19, 2024
1f13df5
feat(gfql): export alias e
lmeyerov Feb 23, 2024
0a8efbf
wip(telemetry)
lmeyerov Feb 23, 2024
af5c1bc
test(chain): add failing gfql tests
lmeyerov Feb 23, 2024
30d64b2
fix(hop): debugging_hop=False in prod
lmeyerov Feb 24, 2024
167513d
fix(hop): debugging_hop=False in prod
lmeyerov Feb 24, 2024
66948c0
fix(hop): debugging_hop=False in prod
lmeyerov Feb 24, 2024
8ff98fa
fix(GFQL): some shorest path queries
lmeyerov Feb 24, 2024
5f02c49
garden(gfql): more logs
lmeyerov Feb 24, 2024
95836a1
fix(ci): work around ai fails via test env pinning
lmeyerov Feb 24, 2024
542416f
fix(deps): more dirty cat and umap env handling
lmeyerov Feb 24, 2024
1cb9020
fix(lint)
lmeyerov Feb 24, 2024
ff38bcc
fix(types)
lmeyerov Feb 24, 2024
4d493a9
fix(dirty_cat): missing import
lmeyerov Feb 24, 2024
4272d23
fix(lint)
lmeyerov Feb 24, 2024
795e6d1
docs(changelog); version
lmeyerov Feb 25, 2024
d2728e8
docs(publish): correct flow
lmeyerov Feb 25, 2024
6cb44ab
docs(0.33.2): bump for readthedocs resync
lmeyerov Feb 25, 2024
e41ab49
Merge branch 'master' into dev/depman_gpufeat
dcolinmorgan Feb 27, 2024
ff94946
update feature engine check
dcolinmorgan Feb 27, 2024
9c73438
type ignore
dcolinmorgan Feb 27, 2024
581df16
depman>lazy
dcolinmorgan Feb 27, 2024
993b9fe
edge determine engine logic fix
tanmoyio Feb 27, 2024
a1b61ac
euclidean is default, so comment out
dcolinmorgan Feb 27, 2024
b57978e
euclidean is default, so comment out
dcolinmorgan Feb 27, 2024
2d8ca8b
remove dup
dcolinmorgan Feb 27, 2024
e620baf
remove dup
dcolinmorgan Feb 27, 2024
4e7c9b5
euclidean for dirty_cat only since default for cucat
dcolinmorgan Feb 27, 2024
dcff47c
docker test fix
dcolinmorgan Feb 27, 2024
83f55f9
docker test fix
dcolinmorgan Feb 27, 2024
b193706
tweaks
dcolinmorgan Mar 1, 2024
8e2999a
more param to umap tests, last test= cuml V umap
dcolinmorgan Mar 1, 2024
12914c4
lint
dcolinmorgan Mar 1, 2024
db380ef
lint
dcolinmorgan Mar 1, 2024
3ebea98
lint
dcolinmorgan Mar 1, 2024
6faaa68
lint
dcolinmorgan Mar 1, 2024
bb4b994
lint
dcolinmorgan Mar 1, 2024
cb2b08a
more param line reduction
dcolinmorgan Mar 1, 2024
d3684f5
lint
dcolinmorgan Mar 1, 2024
f0b23f4
Merge branch 'master' into dev/depman_gpufeat
dcolinmorgan May 23, 2024
6d7df64
devman update for test
dcolinmorgan May 23, 2024
3462b97
replace try with specific ifs
dcolinmorgan Jul 10, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ jobs:
source pygraphistry/bin/activate
./bin/test-umap-learn-core.sh


test-full-ai:

needs: [ test-minimal-python ]
Expand Down
1 change: 0 additions & 1 deletion docker/test-gpu-local.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,5 +47,4 @@ docker run \
${NETWORK} \
graphistry/test-gpu:${TEST_CPU_VERSION} \
--maxfail=1 \
--ignore=graphistry/tests/test_feature_utils.py \
$@
1 change: 1 addition & 0 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@
('py:class', 'umap'),
('py:class', 'sentence_transformers'),
('py:class', 'dirty_cat'),
('py:class', 'cu_cat'),
('py:class', 'sklearn'),
('py:class', 'scipy'),
('py:class', 'seaborn'),
Expand Down
16 changes: 13 additions & 3 deletions graphistry/ai_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pandas as pd
import numpy as np

from inspect import getmodule
import graphistry

from .constants import DISTANCE, WEIGHT, BATCH
Expand Down Expand Up @@ -422,7 +422,10 @@ def infer_self_graph(res,
assert (
emb.shape[0] == df.shape[0]
), "minibatches emb and X must have same number of rows since h(df) = emb"
df = df.assign(x=emb.x, y=emb.y) # add x and y to df for graphistry instance
if emb.x is not None:
df = df.assign(x=emb.x, y=emb.y) # add x and y to df for graphistry instance
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

instead of try/catch, can we do value inspection?

else:
df = df.assign(x=emb[0], y=emb[1]) # if umap kwargs n_components > 2, take first 2 here
else: # if umap has been fit, but only transforming over features, need to add x and y or breaks plot binds of res
df['x'] = np.random.random(df.shape[0])
df['y'] = np.random.random(df.shape[0])
Expand All @@ -447,7 +450,14 @@ def infer_self_graph(res,

for i in range(X_new.shape[0]):
diff = X_previously_fit - X_new.iloc[i, :]
dist = np.linalg.norm(diff, axis=1) # Euclidean distance
try:
diff = np.array(diff, dtype = 'float')
except TypeError:
pass
if 'pandas' in str(getmodule(diff)):
dist = np.linalg.norm(diff, axis=1) # Euclidean distance
else:
dist = np.linalg.norm(diff.to_pandas(), axis=1) # Euclidean distance
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same, instead of try/catch, can we do value inspection?

mdists.append(dist)

m, std = np.mean(mdists), np.std(mdists)
Expand Down
4 changes: 1 addition & 3 deletions graphistry/compute/ComputeMixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,7 @@ def materialize_nodes(
import cudf
if isinstance(g._edges, cudf.DataFrame):
engine_concrete = Engine.CUDF
except ImportError:
pass
if engine == EngineAbstract.AUTO:
except:
raise ValueError('Could not determine engine for edges, expected pandas or cudf dataframe, got: {}'.format(type(g._edges)))
else:
engine_concrete = Engine(engine.value)
Expand Down
1 change: 1 addition & 0 deletions graphistry/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
# for preprocessors namespace
# for dirty_cat params
DIRTY_CAT = "dirty_cat"
CUDA_CAT = "cu_cat"
N_TOPICS_DEFAULT = 42
N_TOPICS_TARGET_DEFAULT = 7
N_HASHERS_DEFAULT = 100
Expand Down
30 changes: 30 additions & 0 deletions graphistry/dep_manager.py
Copy link
Contributor Author

@dcolinmorgan dcolinmorgan Jul 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@lmeyerov I will back this out everywhere (not too pervasive in overhaul yet, 10 files including tests), but its pretty simple, works well, and gets rid of instances like in embed_utils where you end up using the lazy imports almost comically throughout, so i can put this into a seperate PR, only to be reunited one day far off

_, torch, _, _, _, _, _, _ = lazy_embed_import_dep()
_, _, _, dgl, _, _, _, _ = lazy_embed_import_dep()

Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import importlib
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

from importlib import __import__, import_module ?


Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this class benefits from a comment on its design and the problem it solves

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ex: maybe it does a fast & cached check of a package being in the path before actually importing, so there should be a testable and observable speedup?

class DepManager:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

move dep_manager.py to utils/dep_manager.py

def __init__(self):
self.pkgs = {}

def __getattr__(self, pkg:str):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

instead call this def import(...) or something to make it explicit that code is running

self._add_deps(pkg)
try:
return self.pkgs[pkg]
except KeyError:
return None
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

instead of hiding the exception from the user, it's probably more informative and unsurprising to:

  • cache the exception during _add_deps
  • fetch the cached exception, instead of None, and rethrow here, instead of return


def _add_deps(self, pkg:str):
try:
pkg_val = importlib.import_module(pkg)
self.pkgs[pkg] = pkg_val
setattr(self, pkg, pkg_val)
except:
pass

def import_from(self, pkg:str, name:str):
try:
module = __import__(pkg, fromlist=[name])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

importlib docs recommend using importlib.import_module instead

self.pkgs[name] = module
except:
pass


deps = DepManager()
48 changes: 24 additions & 24 deletions graphistry/dgl_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
)

from .util import setup_logger

from .dep_manager import deps

if TYPE_CHECKING:
import scipy
Expand All @@ -34,30 +34,29 @@
MIXIN_BASE = object


def lazy_dgl_import_has_dependency():
try:
import warnings
warnings.filterwarnings('ignore')
import dgl # noqa: F811
return True, 'ok', dgl
except ModuleNotFoundError as e:
return False, e, None
# def lazy_dgl_import_has_dependency():
# try:
# import warnings
# warnings.filterwarnings('ignore')
# import dgl # noqa: F811
# return True, 'ok', dgl
# except ModuleNotFoundError as e:
# return False, e, None


def lazy_torch_import_has_dependency():
try:
import warnings
warnings.filterwarnings('ignore')
import torch # noqa: F811
return True, 'ok', torch
except ModuleNotFoundError as e:
return False, e, None
# def lazy_torch_import_has_dependency():
# try:
# import warnings
# warnings.filterwarnings('ignore')
# import torch # noqa: F811
# return True, 'ok', torch
# except ModuleNotFoundError as e:
# return False, e, None


logger = setup_logger(name=__name__)



# #########################################################################################
#
# Torch helpers
Expand All @@ -73,7 +72,7 @@ def convert_to_torch(X_enc: pd.DataFrame, y_enc: Optional[pd.DataFrame]): # typ
:param y_enc: DataFrame Matrix of Values for Target
:return: Dictionary of torch encoded arrays
"""
_, _, torch = lazy_torch_import_has_dependency() # noqa: F811
torch = deps.torch # noqa: F811

if not y_enc.empty: # type: ignore
data = {
Expand All @@ -98,7 +97,7 @@ def get_available_devices():
device (torch.device): Main device (GPU 0 or CPU).
gpu_ids (list): List of IDs of all GPUs that are available.
"""
_, _, torch = lazy_torch_import_has_dependency() # noqa: F811
torch = deps.torch # noqa: F811

gpu_ids = []
if torch.cuda.is_available():
Expand Down Expand Up @@ -181,7 +180,8 @@ def pandas_to_dgl_graph(
sp_mat: sparse scipy matrix
ordered_nodes_dict: dict ordered from most common src and dst nodes
"""
_, _, dgl = lazy_dgl_import_has_dependency() # noqa: F811
dgl = deps.dgl # noqa: F811

sp_mat, ordered_nodes_dict = pandas_to_sparse_adjacency(df, src, dst, weight_col)
g = dgl.from_scipy(sp_mat, device=device) # there are other ways too
logger.info(f"Graph Type: {type(g)}")
Expand All @@ -196,7 +196,7 @@ def get_torch_train_test_mask(n: int, ratio: float = 0.8):
:param ratio: mimics train/test split. `ratio` sets number of True vs False mask entries.
:return: train and test torch tensor masks
"""
_, _, torch = lazy_torch_import_has_dependency() # noqa: F811
torch = deps.torch # noqa: F811

train_mask = torch.zeros(n, dtype=torch.bool).bernoulli(ratio)
test_mask = ~train_mask
Expand Down Expand Up @@ -225,8 +225,8 @@ def dgl_lazy_init(self, train_split: float = 0.8, device: str = "cpu"):
"""

if not self.dgl_initialized:
lazy_dgl_import_has_dependency()
lazy_torch_import_has_dependency()
deps.dgl
deps.torch
self.train_split = train_split
self.device = device
self._removed_edges_previously = False
Expand Down
70 changes: 30 additions & 40 deletions graphistry/embed_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,43 +2,22 @@
import numpy as np
import pandas as pd
from typing import Optional, Union, Callable, List, TYPE_CHECKING, Any, Tuple

from inspect import getmodule
from .PlotterBase import Plottable
from .compute.ComputeMixin import ComputeMixin
from .dep_manager import deps


def lazy_embed_import_dep():
try:
import torch
import torch.nn as nn
import dgl
from dgl.dataloading import GraphDataLoader
import torch.nn.functional as F
from .networks import HeteroEmbed
from tqdm import trange
return True, torch, nn, dgl, GraphDataLoader, HeteroEmbed, F, trange

except:
return False, None, None, None, None, None, None, None

def check_cudf():
try:
import cudf
return True, cudf
except:
return False, object


if TYPE_CHECKING:
_, torch, _, _, _, _, _, _ = lazy_embed_import_dep()
torch = deps.torch
TT = torch.Tensor
MIXIN_BASE = ComputeMixin
else:
TT = Any
MIXIN_BASE = object
torch = Any

has_cudf, cudf = check_cudf()
cudf = deps.cudf

XSymbolic = Optional[Union[List[str], str, pd.DataFrame]]
ProtoSymbolic = Optional[Union[str, Callable[[TT, TT, TT], TT]]] # type: ignore
Expand Down Expand Up @@ -99,8 +78,7 @@ def __init__(self):
self._device = "cpu"

def _preprocess_embedding_data(self, res, train_split:Union[float, int] = 0.8) -> Plottable:
#_, torch, _, _, _, _, _, _ = lazy_embed_import_dep()
import torch
torch = deps.torch
log('Preprocessing embedding data')
src, dst = res._source, res._destination
relation = res._relation
Expand Down Expand Up @@ -147,7 +125,7 @@ def _preprocess_embedding_data(self, res, train_split:Union[float, int] = 0.8) -
return res

def _build_graph(self, res) -> Plottable:
_, _, _, dgl, _, _, _, _ = lazy_embed_import_dep()
dgl = deps.dgl
s, r, t = res._triplets.T

if res._train_idx is not None:
Expand All @@ -169,7 +147,10 @@ def _build_graph(self, res) -> Plottable:


def _init_model(self, res, batch_size:int, sample_size:int, num_steps:int, device):
_, _, _, _, GraphDataLoader, HeteroEmbed, _, _ = lazy_embed_import_dep()
dgl_ = deps.dgl
if dgl_:
from dgl.dataloading import GraphDataLoader
from .networks import HeteroEmbed
g_iter = SubgraphIterator(res._kg_dgl, sample_size, num_steps)
g_dataloader = GraphDataLoader(
g_iter, batch_size=batch_size, collate_fn=lambda x: x[0]
Expand All @@ -186,9 +167,11 @@ def _init_model(self, res, batch_size:int, sample_size:int, num_steps:int, devic
)

return model, g_dataloader

def _train_embedding(self, res, epochs:int, batch_size:int, lr:float, sample_size:int, num_steps:int, device) -> Plottable:
_, torch, nn, _, _, _, _, trange = lazy_embed_import_dep()
torch = deps.torch
nn = deps.torch.nn
trange = deps.tqdm.trange
log('Training embedding')
model, g_dataloader = res._init_model(res, batch_size, sample_size, num_steps, device)
if hasattr(res, "_embed_model") and not res._build_new_embedding_model:
Expand Down Expand Up @@ -232,7 +215,7 @@ def _train_embedding(self, res, epochs:int, batch_size:int, lr:float, sample_siz

@property
def _gcn_node_embeddings(self):
_, torch, _, _, _, _, _, _ = lazy_embed_import_dep()
torch = deps.torch
g_dgl = self._kg_dgl.to(self._device)
em = self._embed_model(g_dgl).detach()
torch.cuda.empty_cache()
Expand Down Expand Up @@ -301,12 +284,12 @@ def embed(
"""
# this is temporary, will be fixed in future releases
try:
if isinstance(self._nodes, cudf.DataFrame):
if 'cudf' in str(getmodule(self._nodes)):
self._nodes = self._nodes.to_pandas()
except:
pass
try:
if isinstance(self._edges, cudf.DataFrame):
if 'cudf' in str(getmodule(self._edges)):
self._edges = self._edges.to_pandas()
except:
pass
Expand Down Expand Up @@ -436,7 +419,7 @@ def predict_links(
else:
# this is temporary, will be removed after gpu feature utils
try:
if isinstance(source, cudf.DataFrame):
if 'cudf' in str(getmodule(source)):
source = source.to_pandas() # type: ignore
except:
pass
Expand All @@ -448,7 +431,7 @@ def predict_links(
else:
# this is temporary, will be removed after gpu feature utils
try:
if isinstance(relation, cudf.DataFrame):
if 'cudf' in str(getmodule(relation)):
relation = relation.to_pandas() # type: ignore
except:
pass
Expand All @@ -460,7 +443,8 @@ def predict_links(
else:
# this is temporary, will be removed after gpu feature utils
try:
if isinstance(destination, cudf.DataFrame):
# if isinstance(destination, cudf.DataFrame):
if 'cudf' in str(getmodule(destination)):
destination = destination.to_pandas() # type: ignore
except:
pass
Expand Down Expand Up @@ -540,7 +524,7 @@ def fetch_triplets_for_inference(x_r):


def _score(self, triplets: Union[np.ndarray, TT]) -> TT: # type: ignore
_, torch, _, _, _, _, _, _ = lazy_embed_import_dep()
torch = deps.torch
emb = self._kg_embeddings.clone().detach()
if not isinstance(triplets, torch.Tensor):
triplets = torch.tensor(triplets)
Expand Down Expand Up @@ -571,7 +555,13 @@ def __len__(self) -> int:
return self.num_steps

def __getitem__(self, i:int):
_, torch, nn, dgl, GraphDataLoader, _, F, _ = lazy_embed_import_dep()
torch = deps.torch
from torch import nn
from torch.nn import functional as F
dgl = deps.dgl

from dgl.dataloading import GraphDataLoader

eids = torch.from_numpy(np.random.choice(self.eids, self.sample_size))

src, dst = self.g.find_edges(eids)
Expand All @@ -593,7 +583,7 @@ def __getitem__(self, i:int):

@staticmethod
def _sample_neg(triplets:np.ndarray, num_nodes:int) -> Tuple[TT, TT]: # type: ignore
_, torch, _, _, _, _, _, _ = lazy_embed_import_dep()
torch = deps.torch
triplets = torch.tensor(triplets)
h, r, t = triplets.T
h_o_t = torch.randint(high=2, size=h.size())
Expand Down
Loading
Loading