From 9ca4ac0e6f10367fd0ccfd5848089c2988b20db6 Mon Sep 17 00:00:00 2001 From: roger <18309862+rogerwwww@users.noreply.github.com> Date: Sat, 17 Dec 2022 18:28:04 +0800 Subject: [PATCH 1/7] improve linear solver coverage and bug fix --- pygmtools/linear_solvers.py | 4 ++-- tests/test_classic_solvers.py | 13 ++++++++----- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/pygmtools/linear_solvers.py b/pygmtools/linear_solvers.py index 5a900dfd..adb26d58 100644 --- a/pygmtools/linear_solvers.py +++ b/pygmtools/linear_solvers.py @@ -647,7 +647,7 @@ def sinkhorn(s, n1=None, n2=None, unmatch1=None, unmatch2=None, unmatch1 = _unsqueeze(unmatch1, 0, backend) unmatch2 = _unsqueeze(unmatch2, 0, backend) if not _check_shape(unmatch1, 2, backend) or not _check_shape(unmatch2, 2, backend): - raise ValueError(f'the input argument unmatch1 and unmatch2 are illegal. They should be 2-dim' + raise ValueError(f'the input arguments unmatch1 and unmatch2 are illegal. They should be 2-dim' f'for batched input, and 1-dim for non-batched input.') if not all((_get_shape(unmatch1, backend)[1] == _get_shape(s, backend)[1], _get_shape(unmatch2, backend)[1] == _get_shape(s, backend)[2], @@ -1141,7 +1141,7 @@ def hungarian(s, n1=None, n2=None, unmatch1=None, unmatch2=None, unmatch1 = _unsqueeze(unmatch1, 0, backend) unmatch2 = _unsqueeze(unmatch2, 0, backend) if not _check_shape(unmatch1, 2, backend) or not _check_shape(unmatch2, 2, backend): - raise ValueError(f'the input argument unmatch1 and unmatch2 are illegal. They should be 2-dim' + raise ValueError(f'the input arguments unmatch1 and unmatch2 are illegal. They should be 2-dim' f'for batched input, and 1-dim for non-batched input.') if not all((_get_shape(unmatch1, backend)[1] == _get_shape(s, backend)[1], _get_shape(unmatch2, backend)[1] == _get_shape(s, backend)[2], diff --git a/tests/test_classic_solvers.py b/tests/test_classic_solvers.py index 380aceb7..52790a60 100644 --- a/tests/test_classic_solvers.py +++ b/tests/test_classic_solvers.py @@ -142,14 +142,16 @@ def _test_classic_solver_on_linear_assignment(num_nodes1, num_nodes2, node_feat_ F1, F2, X_gt = (pygm.utils.build_batch(_) for _ in (F1, F2, X_gt)) if batch_size > 1: F1, F2, n1, n2, X_gt = data_to_numpy(F1, F2, n1, n2, X_gt) + if unmatch: + unmatch1, unmatch2 = (pygm.utils.build_batch(_) for _ in (unmatch1, unmatch2)) + unmatch1, unmatch2 = data_to_numpy(unmatch1, unmatch2) else: F1, F2, n1, n2, X_gt = data_to_numpy( F1.squeeze(0), F2.squeeze(0), n1, n2, X_gt.squeeze(0) ) - - if unmatch: - unmatch1, unmatch2 = (pygm.utils.build_batch(_) for _ in (unmatch1, unmatch2)) - unmatch1, unmatch2 = data_to_numpy(unmatch1, unmatch2) + if unmatch: + unmatch1, unmatch2 = (pygm.utils.build_batch(_) for _ in (unmatch1, unmatch2)) + unmatch1, unmatch2 = data_to_numpy(unmatch1.squeeze(0), unmatch2.squeeze(0)) last_X = None for working_backend in backends: @@ -200,7 +202,7 @@ def test_hungarian(): # non-batched input _test_classic_solver_on_linear_assignment([10], [30], 10, pygm.hungarian, { 'nproc': [1], - 'outlier_num': [0] + 'outlier_num': [0, 5] }, ['pytorch', 'numpy', 'paddle' ,'jittor','tensorflow']) @@ -245,6 +247,7 @@ def test_sinkhorn(): 'max_iter': [500], 'batched_operation': [True], 'dummy_row': [True], + 'outlier_num': [0, 5] }, ['pytorch', 'numpy', 'paddle', 'jittor','tensorflow']) _test_classic_solver_on_linear_assignment(*args1) From 702645cadac1816475ab8d06255b648b11d3c8e2 Mon Sep 17 00:00:00 2001 From: roger <18309862+rogerwwww@users.noreply.github.com> Date: Sat, 17 Dec 2022 18:37:37 +0800 Subject: [PATCH 2/7] improve gamgm coverage and bug fix --- pygmtools/numpy_backend.py | 10 ++++++---- pygmtools/paddle_backend.py | 8 ++++++-- tests/test_multi_graph_solvers.py | 17 +++++++++-------- 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/pygmtools/numpy_backend.py b/pygmtools/numpy_backend.py index 3453e46a..fbea2565 100644 --- a/pygmtools/numpy_backend.py +++ b/pygmtools/numpy_backend.py @@ -764,14 +764,16 @@ def gamgm_real( UUt = np.matmul(U_hung, U_hung.transpose()) cluster_weight = np.repeat(cluster_M, ns.astype('i4'), axis=0) cluster_weight = np.repeat(cluster_weight, ns.astype('i4'), axis=1) - quad = np.linalg.multi_dot(supA, UUt * cluster_weight, supA, U_hung) * quad_weight * 2 + quad = np.linalg.multi_dot((supA, UUt * cluster_weight, supA, U_hung)) * quad_weight * 2 unary = np.matmul(supW * cluster_weight, U_hung) - max_vals = (unary + quad).max(axis=1).values + max_vals = (unary + quad).max(axis=1) U = U * (unary + quad > outlier_thresh) if verbose: print(f'hungarian #iter={i}/{max_iter} ' - f'unary+quad score thresh={outlier_thresh:.3f}, #>thresh={np.sum(max_vals > outlier_thresh)}/{max_vals.shape[0]}' - f' min:{max_vals.min():.4f}, mean:{max_vals.mean():.4f}, median:{max_vals.median():.4f}, max:{max_vals.max():.4f}') + f'unary+quad score thresh={outlier_thresh:.3f}, ' + f'#>thresh={np.sum(max_vals > outlier_thresh)}/{max_vals.shape[0]} ' + f'min:{max_vals.min():.4f}, mean:{max_vals.mean():.4f}, ' + f'median:{np.median(max_vals):.4f}, max:{max_vals.max():.4f}') if np.linalg.norm(np.matmul(U, U.T) - lastUUt) < converge_thresh: break diff --git a/pygmtools/paddle_backend.py b/pygmtools/paddle_backend.py index 17c99e9c..358aacb4 100644 --- a/pygmtools/paddle_backend.py +++ b/pygmtools/paddle_backend.py @@ -749,8 +749,12 @@ def gamgm_real( U = U * (unary + quad > outlier_thresh) if verbose: print(f'hungarian #iter={i}/{max_iter} ' - f'unary+quad score thresh={outlier_thresh:.3f}, #>thresh={paddle.sum(max_vals > outlier_thresh)}/{max_vals.shape[0]}' - f' min:{max_vals.min():.4f}, mean:{max_vals.mean():.4f}, median:{max_vals.median():.4f}, max:{max_vals.max():.4f}') + f'unary+quad score thresh={outlier_thresh:.3f}, ' + f'#>thresh={paddle.sum(max_vals > outlier_thresh).numpy().squeeze()}/{max_vals.shape[0]} ' + f'min:{max_vals.min().numpy().squeeze():.4f}, ' + f'mean:{max_vals.mean().numpy().squeeze():.4f}, ' + f'median:{max_vals.median().numpy().squeeze():.4f}, ' + f'max:{max_vals.max().numpy().squeeze():.4f}') if paddle.linalg.norm(paddle.mm(U, U.t()) - lastUUt) < converge_thresh: break diff --git a/tests/test_multi_graph_solvers.py b/tests/test_multi_graph_solvers.py index 762bdd13..bffc0fb8 100644 --- a/tests/test_multi_graph_solvers.py +++ b/tests/test_multi_graph_solvers.py @@ -186,13 +186,13 @@ def test_gamgm(): num_nodes = 5 num_graphs = 10 # test without outliers - _test_mgm_solver_on_isomorphic_graphs(num_graphs, num_nodes, 10, pygm.gamgm, 'kb-qap', { - 'sk_init_tau': [0.5, 0.1], - 'sk_min_tau': [0.1, 0.05], - 'param_lambda': [0.1, 0.5], - 'node_aff_fn': [functools.partial(pygm.utils.gaussian_aff_fn, sigma=.1), pygm.utils.inner_prod_aff_fn], - 'verbose': [True] - }, ['pytorch', 'numpy', 'paddle', 'jittor']) + # _test_mgm_solver_on_isomorphic_graphs(num_graphs, num_nodes, 10, pygm.gamgm, 'kb-qap', { + # 'sk_init_tau': [0.5, 0.1], + # 'sk_min_tau': [0.1, 0.05], + # 'param_lambda': [0.1, 0.5], + # 'node_aff_fn': [functools.partial(pygm.utils.gaussian_aff_fn, sigma=.1), pygm.utils.inner_prod_aff_fn], + # 'verbose': [True] + # }, ['pytorch', 'numpy', 'paddle', 'jittor']) # test with outliers _test_mgm_solver_on_isomorphic_graphs(num_graphs, num_nodes, 10, pygm.gamgm, 'kb-qap', { @@ -203,6 +203,7 @@ def test_gamgm(): 'node_aff_fn': [functools.partial(pygm.utils.gaussian_aff_fn, sigma=.1)], 'verbose': [True], 'n_univ': [10], + 'outlier_thresh': [0., 0.1], 'ns': [np.array([num_nodes] * (num_graphs // 2) + [num_nodes-1] * (num_graphs - num_graphs // 2))], }, ['pytorch', 'numpy', 'paddle', 'jittor']) @@ -269,7 +270,7 @@ def execute (self, As) : if __name__ == '__main__': - test_gamgm_backward() + #test_gamgm_backward() test_gamgm() test_mgm_floyd() test_cao() From b906cc0d294616dd109df67edb68861a51042cbe Mon Sep 17 00:00:00 2001 From: roger <18309862+rogerwwww@users.noreply.github.com> Date: Sat, 17 Dec 2022 20:36:59 +0800 Subject: [PATCH 3/7] improve neural solver coverage and bug fix --- pygmtools/neural_solvers.py | 16 ++++++++++++++- tests/test_multi_graph_solvers.py | 2 +- tests/test_neural_solvers.py | 34 +++++++++++++++++++++++++++---- 3 files changed, 46 insertions(+), 6 deletions(-) diff --git a/pygmtools/neural_solvers.py b/pygmtools/neural_solvers.py index 320107a8..1d267307 100644 --- a/pygmtools/neural_solvers.py +++ b/pygmtools/neural_solvers.py @@ -15,7 +15,9 @@ import importlib import pygmtools -from pygmtools.utils import NOT_IMPLEMENTED_MSG, _check_shape, _get_shape, _unsqueeze, _squeeze, _check_data_type +import numpy as np +from pygmtools.utils import NOT_IMPLEMENTED_MSG, from_numpy, \ + _check_shape, _get_shape, _unsqueeze, _squeeze, _check_data_type from pygmtools.classic_solvers import __check_gm_arguments @@ -239,6 +241,8 @@ def pca_gm(feat1, feat2, A1, A2, n1=None, n2=None, if all([_check_shape(_, 2, backend) for _ in (feat1, feat2, A1, A2)]): feat1, feat2, A1, A2 = [_unsqueeze(_, 0, backend) for _ in (feat1, feat2, A1, A2)] + if type(n1) is int: n1 = from_numpy(np.array([n1]), backend=backend) + if type(n2) is int: n2 = from_numpy(np.array([n2]), backend=backend) non_batched_input = True elif all([_check_shape(_, 3, backend) for _ in (feat1, feat2, A1, A2)]): non_batched_input = False @@ -255,6 +259,8 @@ def pca_gm(feat1, feat2, A1, A2, n1=None, n2=None, raise ValueError( f'the input dimensions do not match. Got feat1:{_get_shape(feat1, backend)}, ' f'feat2:{_get_shape(feat2, backend)}, A1:{_get_shape(A1, backend)}, A2:{_get_shape(A2, backend)}!') + if n1 is not None: _check_data_type(n1, 'n1', backend) + if n2 is not None: _check_data_type(n2, 'n2', backend) args = (feat1, feat2, A1, A2, n1, n2, in_channel, hidden_channel, out_channel, num_layers, sk_max_iter, sk_tau, network, pretrain) @@ -500,6 +506,8 @@ def ipca_gm(feat1, feat2, A1, A2, n1=None, n2=None, if all([_check_shape(_, 2, backend) for _ in (feat1, feat2, A1, A2)]): feat1, feat2, A1, A2 = [_unsqueeze(_, 0, backend) for _ in (feat1, feat2, A1, A2)] + if type(n1) is int: n1 = from_numpy(np.array([n1]), backend=backend) + if type(n2) is int: n2 = from_numpy(np.array([n2]), backend=backend) non_batched_input = True elif all([_check_shape(_, 3, backend) for _ in (feat1, feat2, A1, A2)]): non_batched_input = False @@ -516,6 +524,8 @@ def ipca_gm(feat1, feat2, A1, A2, n1=None, n2=None, raise ValueError( f'the input dimensions do not match. Got feat1:{_get_shape(feat1, backend)}, ' f'feat2:{_get_shape(feat2, backend)}, A1:{_get_shape(A1, backend)}, A2:{_get_shape(A2, backend)}!') + if n1 is not None: _check_data_type(n1, 'n1', backend) + if n2 is not None: _check_data_type(n2, 'n2', backend) args = (feat1, feat2, A1, A2, n1, n2, in_channel, hidden_channel, out_channel, num_layers, cross_iter, sk_max_iter, sk_tau, network, pretrain) @@ -774,6 +784,8 @@ def cie(feat_node1, feat_node2, A1, A2, feat_edge1, feat_edge2, n1=None, n2=None and all([_check_shape(_, 3, backend) for _ in (feat_edge1, feat_edge2)]): feat_node1, feat_node2, A1, A2, feat_edge1, feat_edge2 =\ [_unsqueeze(_, 0, backend) for _ in (feat_node1, feat_node2, A1, A2, feat_edge1, feat_edge2)] + if type(n1) is int: n1 = from_numpy(np.array([n1]), backend=backend) + if type(n2) is int: n2 = from_numpy(np.array([n2]), backend=backend) non_batched_input = True elif all([_check_shape(_, 3, backend) for _ in (feat_node1, feat_node2, A1, A2)]) \ and all([_check_shape(_, 4, backend) for _ in (feat_edge1, feat_edge2)]): @@ -798,6 +810,8 @@ def cie(feat_node1, feat_node2, A1, A2, feat_edge1, feat_edge2, n1=None, n2=None f'the input dimensions do not match. Got feat_node1:{_get_shape(feat_node1, backend)}, ' f'feat_node2:{_get_shape(feat_node2, backend)}, A1:{_get_shape(A1, backend)}, A2:{_get_shape(A2, backend)},' f'feat_edge1:{_get_shape(feat_edge1, backend)}, feat_edge2:{_get_shape(feat_edge2, backend)}!') + if n1 is not None: _check_data_type(n1, 'n1', backend) + if n2 is not None: _check_data_type(n2, 'n2', backend) args = (feat_node1, feat_node2, A1, A2, feat_edge1, feat_edge2, n1, n2, in_node_channel, in_edge_channel, hidden_channel, out_channel, num_layers, diff --git a/tests/test_multi_graph_solvers.py b/tests/test_multi_graph_solvers.py index bffc0fb8..1f7629da 100644 --- a/tests/test_multi_graph_solvers.py +++ b/tests/test_multi_graph_solvers.py @@ -270,7 +270,7 @@ def execute (self, As) : if __name__ == '__main__': - #test_gamgm_backward() + test_gamgm_backward() test_gamgm() test_mgm_floyd() test_cao() diff --git a/tests/test_neural_solvers.py b/tests/test_neural_solvers.py index 96ffd203..d16e71f5 100644 --- a/tests/test_neural_solvers.py +++ b/tests/test_neural_solvers.py @@ -74,6 +74,8 @@ def _test_neural_solver_on_isomorphic_graphs(graph_num_nodes, node_feat_dim, sol for working_backend in backends: pygm.BACKEND = working_backend _A1, _A2, _F1, _F2, _EF1, _EF2, _n1, _n2 = data_from_numpy(A1, A2, F1, F2, EF1, EF2, n1, n2) + if batch_size == 1: + _n1, _n2 = _n1.item(), _n2.item() if mode == 'lawler-qap': if batch_size > 1: @@ -82,7 +84,6 @@ def _test_neural_solver_on_isomorphic_graphs(graph_num_nodes, node_feat_dim, sol _K = pygm.utils.build_aff_mat(_F1, _edge1, _conn1, _F2, _edge2, _conn2, _n1, _ne1, _n2, _ne2, **aff_param_dict) else: - _n1, _n2 = _n1.item(), _n2.item() _conn1, _edge1 = pygm.utils.dense_to_sparse(_A1) _conn2, _edge2 = pygm.utils.dense_to_sparse(_A2) _K = pygm.utils.build_aff_mat(_F1, _edge1, _conn1, _F2, _edge2, _conn2, _n1, None, _n2, None, @@ -112,6 +113,9 @@ def _test_neural_solver_on_isomorphic_graphs(graph_num_nodes, node_feat_dim, sol f"params: {';'.join([k + '=' + str(v) for k, v in aff_param_dict.items()])};" \ f"{';'.join([k + '=' + str(v) for k, v in solver_param_dict.items()])}" + if 'pretrain' in solver_param_dict and solver_param_dict['pretrain'] is None: + _X1 = pygm.hungarian(_X1, _n1, _n2) + if last_X is not None: assert np.abs(pygm.utils.to_numpy(_X1) - last_X).sum() < 5e-3, \ f"Incorrect GM solution for {working_backend}, " \ @@ -124,6 +128,7 @@ def _test_neural_solver_on_isomorphic_graphs(graph_num_nodes, node_feat_dim, sol f"params: {';'.join([k + '=' + str(v) for k, v in aff_param_dict.items()])};" \ f"{';'.join([k + '=' + str(v) for k, v in solver_param_dict.items()])}" + def test_pca_gm(): _test_neural_solver_on_isomorphic_graphs(list(range(10, 30, 2)), 1024, pygm.pca_gm, 'individual-graphs', { 'pretrain': ['voc', 'willow', 'voc-all'], @@ -134,6 +139,13 @@ def test_pca_gm(): 'pretrain': ['voc'], }, ['pytorch', 'numpy', 'jittor']) + # test more layers + _test_neural_solver_on_isomorphic_graphs([10], 1024, pygm.pca_gm, 'individual-graphs', { + 'num_layers': [3], + 'pretrain': [None], + }, ['pytorch', 'numpy', 'jittor']) + + def test_ipca_gm(): _test_neural_solver_on_isomorphic_graphs(list(range(10, 30, 2)), 1024, pygm.ipca_gm, 'individual-graphs', { 'pretrain': ['voc', 'willow'], @@ -144,6 +156,13 @@ def test_ipca_gm(): 'pretrain': ['voc'], }, ['pytorch', 'numpy', 'jittor']) + # test more layers + _test_neural_solver_on_isomorphic_graphs([10], 1024, pygm.ipca_gm, 'individual-graphs', { + 'num_layers': [3], + 'pretrain': [None], + }, ['pytorch', 'numpy', 'jittor']) + + def test_cie(): _test_neural_solver_on_isomorphic_graphs(list(range(10, 30, 2)), 1024, pygm.cie, 'individual-graphs-edge', { 'pretrain': ['voc', 'willow'], @@ -154,6 +173,12 @@ def test_cie(): 'pretrain': ['voc'], }, ['pytorch', 'numpy', 'jittor']) + # test more layers + _test_neural_solver_on_isomorphic_graphs([10], 1024, pygm.cie, 'individual-graphs-edge', { + 'num_layers': [3], + 'pretrain': [None], + }, ['pytorch', 'numpy', 'jittor']) + def test_ngm(): _test_neural_solver_on_isomorphic_graphs(list(range(10, 30, 2)), 1024, pygm.ngm, 'lawler-qap', { 'edge_aff_fn': [functools.partial(pygm.utils.gaussian_aff_fn, sigma=1.), pygm.utils.inner_prod_aff_fn], @@ -168,8 +193,9 @@ def test_ngm(): 'pretrain': ['voc'], }, ['pytorch', 'numpy', 'jittor']) + if __name__ == '__main__': - # test_pca_gm() - # test_ipca_gm() - # test_cie() + test_pca_gm() + test_ipca_gm() + test_cie() test_ngm() From ac197fe68e8cda8d0c327db51bd0410fe58ee20e Mon Sep 17 00:00:00 2001 From: Runzhong Wang <18309862+rogerwwww@users.noreply.github.com> Date: Sun, 18 Dec 2022 12:23:17 +0800 Subject: [PATCH 4/7] merge dataset test (#46) --- .github/workflows/python-package.yml | 67 ++++++++++++++++++++++- pygmtools/benchmark.py | 33 +++++++++--- pygmtools/dataset.py | 79 +++++++++------------------- pygmtools/dataset_config.py | 2 +- tests/test_dataset.py | 27 +++++----- 5 files changed, 133 insertions(+), 75 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 446cac3b..350a2d02 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -10,7 +10,7 @@ on: branches: [ main ] jobs: - build: + linux: runs-on: ubuntu-latest strategy: @@ -40,3 +40,68 @@ jobs: pytest --cov=pygmtools --cov-report=xml - name: Upload to codecov uses: codecov/codecov-action@v3 + +# macos: +# +# runs-on: macos-latest +# strategy: +# fail-fast: false +# matrix: +# python-version: [ "3.7", "3.8", "3.9" ] +# +# steps: +# - uses: actions/checkout@v2 +# - name: Set up Python ${{ matrix.python-version }} +# uses: actions/setup-python@v2 +# with: +# python-version: ${{ matrix.python-version }} +# - name: Install dependencies +# run: | +# brew reinstall libomp +# brew --prefix libomp +# export LIBRARY_PATH=/usr/local/opt +# python -m pip install --upgrade pip +# python -m pip install flake8 pytest-cov +# pip install -r tests/requirements.txt +# - name: Lint with flake8 +# run: | +# # stop the build if there are Python syntax errors or undefined names +# flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics +# # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide +# flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics +# - name: Test with pytest +# run: | +# pytest --cov=pygmtools --cov-report=xml +# - name: Upload to codecov +# uses: codecov/codecov-action@v3 +# +# windows: +# +# runs-on: windows-latest +# strategy: +# fail-fast: false +# matrix: +# python-version: [ "3.8", "3.9" ] +# +# steps: +# - uses: actions/checkout@v2 +# - name: Set up Python ${{ matrix.python-version }} +# uses: actions/setup-python@v2 +# with: +# python-version: ${{ matrix.python-version }} +# - name: Install dependencies +# run: | +# python -m pip install --upgrade pip +# python -m pip install flake8 pytest-cov +# python -m pip install -r tests\requirements.txt +# - name: Lint with flake8 +# run: | +# # stop the build if there are Python syntax errors or undefined names +# flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics +# # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide +# flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics +# - name: Test with pytest +# run: | +# pytest --cov=pygmtools --cov-report=xml +# - name: Upload to codecov +# uses: codecov/codecov-action@v3 \ No newline at end of file diff --git a/pygmtools/benchmark.py b/pygmtools/benchmark.py index 0f9edf41..2da58dfc 100644 --- a/pygmtools/benchmark.py +++ b/pygmtools/benchmark.py @@ -76,7 +76,7 @@ def get_data(self, ids, test=False, shuffle=True): :param ids: list of image ID, usually in ``train.json`` or ``test.json`` :param test: bool, whether the fetched data is used for test; if true, this function will not return ground truth - :param shuffle: bool, whether to shuffle the order of keypoints; valid only when the class param ``sets`` is ``'train'`` + :param shuffle: bool, whether to shuffle the order of keypoints :return: **data_list**: list of data, like ``[{'img': np.array, 'kpts': coordinates of kpts}, ...]`` @@ -103,7 +103,7 @@ def get_data(self, ids, test=False, shuffle=True): obj_dict['kpts'] = self.data_dict[keys]['kpts'] obj_dict['cls'] = self.data_dict[keys]['cls'] obj_dict['univ_size'] = self.data_dict[keys]['univ_size'] - if shuffle and self.sets != 'test': + if shuffle: random.shuffle(obj_dict['kpts']) data_list.append(obj_dict) @@ -197,7 +197,7 @@ def rand_get_data(self, cls=None, num=2, test=False, shuffle=True): :param cls: int or str, class of expected data. None for random class :param num: int, number of images; for example, 2 for 2GM :param test: bool, whether the fetched data is used for test; if true, this function will not return ground truth - :param shuffle: bool, whether to shuffle the order of keypoints; valid only when the class param ``sets`` is ``'train'`` + :param shuffle: bool, whether to shuffle the order of keypoints :return: **data_list**: list of data, like ``[{'img': np.array, 'kpts': coordinates of kpts}, ...]`` @@ -365,14 +365,24 @@ def compute_img_num(self, classes): return num_list - def eval(self, prediction, classes, verbose=False): + def eval(self, prediction, classes, verbose=False, rm_gt_cache=True): r""" Evaluate test results and compute matching accuracy and coverage. :param prediction: list, prediction result, like ``[{'ids': (id1, id2), 'cls': cls, 'permmat': np.array or scipy.sparse}, ...]`` :param classes: list of evaluated classes :param verbose: bool, whether to print the result + :param rm_gt_cache: bool, whether to remove ground truth cache :return: evaluation result in each class and their averages, including p, r, f1 and their standard deviation and coverage + + .. note:: + If there are duplicate data pair in ``prediction``, this function will only evaluate the first pair and + expect that this pair is also the first fetched pair. Therefore, it is recommended that ``prediction`` is + built in an ordered manner, and not shuffled. + + .. note:: + Ground truth cache is saved when data pairs are fetched, and should be removed after evaluation. Make sure + all data pairs are evaluated at once, i.e., ``prediction`` should contain all fetched data pairs. """ with open(self.data_list_path) as f1: @@ -472,6 +482,8 @@ def eval(self, prediction, classes, verbose=False): result['mean']['recall'], result['mean']['recall_std'], result['mean']['f1'], result['mean']['f1_std'] ))) + if rm_gt_cache: + self.rm_gt_cache(last_epoch=False) return result def eval_cls(self, prediction, cls, verbose=False): @@ -482,6 +494,15 @@ def eval_cls(self, prediction, cls, verbose=False): :param cls: str, evaluated class :param verbose: bool, whether to print the result :return: evaluation result on the specified class, including p, r, f1 and their standard deviation and coverage + + .. note:: + If there are duplicate data pair in ``prediction``, this function will only evaluate the first pair and + expect that this pair is also the first fetched pair. Therefore, it is recommended that ``prediction`` is + built in an ordered manner, and not shuffled. Same as the function ``eval``. + + .. note:: + This function will not automatically remove ground truth cache. However, you can still mannually call the + class function ``rm_gt_cache`` to remove groud truth cache after evaluation. """ with open(self.data_list_path) as f1: @@ -547,9 +568,9 @@ def eval_cls(self, prediction, cls, verbose=False): def rm_gt_cache(self, last_epoch=False): r""" - Remove ground truth cache. It is recommended to call this function after evaluation in each epoch. + Remove ground truth cache. It is recommended to call this function after evaluation. - :param last_epoch: Boolean variable, whether this epoch is last epoch; if true, the directory of cache will also be removed. + :param last_epoch: bool, whether this epoch is last epoch; if true, the directory of cache will also be removed, and no more data should be evaluated """ if os.path.exists(self.gt_cache_path): shutil.rmtree(self.gt_cache_path) diff --git a/pygmtools/dataset.py b/pygmtools/dataset.py index 90cb34c9..aebe8c39 100644 --- a/pygmtools/dataset.py +++ b/pygmtools/dataset.py @@ -465,6 +465,7 @@ def __init__(self, sets, obj_resize, **ds_dict): SPLIT_OFFSET = dataset_cfg.WillowObject.SPLIT_OFFSET TRAIN_SAME_AS_TEST = dataset_cfg.WillowObject.TRAIN_SAME_AS_TEST RAND_OUTLIER = dataset_cfg.WillowObject.RAND_OUTLIER + URL = 'http://www.di.ens.fr/willow/research/graphlearning/WILLOW-ObjectClass_dataset.zip' if len(ds_dict.keys()) > 0: if 'CLASSES' in ds_dict.keys(): CLASSES = ds_dict['CLASSES'] @@ -478,11 +479,13 @@ def __init__(self, sets, obj_resize, **ds_dict): TRAIN_SAME_AS_TEST = ds_dict['TRAIN_SAME_AS_TEST'] if 'RAND_OUTLIER' in ds_dict.keys(): RAND_OUTLIER = ds_dict['RAND_OUTLIER'] + if 'URL' in ds_dict.keys(): + URL = ds_dict['URL'] self.dataset_dir = 'data/WillowObject' if not os.path.exists(ROOT_DIR): assert ROOT_DIR == dataset_cfg.WillowObject.ROOT_DIR, 'you should not change ROOT_DIR unless the data have been manually downloaded' - self.download(url='http://www.di.ens.fr/willow/research/graphlearning/WILLOW-ObjectClass_dataset.zip') + self.download(url=URL) if not os.path.exists(self.dataset_dir): os.makedirs(self.dataset_dir) @@ -620,23 +623,23 @@ def process(self): if self.sets == 'train': for x in range(len(self.mat_list)): for name in self.mat_list[x]: - tmp = str(name).split('/') + tmp = os.path.split(str(name)) objID = tmp[-1].split('.')[0] train_list.append(objID) for x in range(len(mat_list_)): for name in mat_list_[x]: - tmp = str(name).split('/') + tmp = os.path.split(str(name)) objID = tmp[-1].split('.')[0] test_list.append(objID) else: for x in range(len(self.mat_list)): for name in self.mat_list[x]: - tmp = str(name).split('/') + tmp = os.path.split(str(name)) objID = tmp[-1].split('.')[0] test_list.append(objID) for x in range(len(mat_list_)): for name in mat_list_[x]: - tmp = str(name).split('/') + tmp = os.path.split(str(name)) objID = tmp[-1].split('.')[0] train_list.append(objID) str1 = json.dumps(train_list) @@ -653,9 +656,9 @@ def process(self): for x in range(len(data_list)): for name in data_list[x]: - tmp = str(name).split('/') + tmp = os.path.split(str(name)) objID = tmp[-1].split('.')[0] - cls = tmp[3] + cls = os.path.split(tmp[0])[-1] annotations = self.__get_anno_dict(name, cls) data_dict[objID] = annotations @@ -864,9 +867,9 @@ def process(self): for x in range(len(data_list)): for name in data_list[x]: - tmp = str(name).split('/') + tmp = os.path.split(str(name)) objID = tmp[-1].split('.')[0] - cls = tmp[3] + cls = os.path.split(tmp[0])[-1] annotations = self.__get_anno_dict(name, cls) ID = objID + '_' + cls data_dict[ID] = annotations @@ -998,6 +1001,7 @@ def __init__(self, sets, obj_resize, **ds_dict): CLASSES = dataset_cfg.IMC_PT_SparseGM.CLASSES ROOT_DIR_NPZ = dataset_cfg.IMC_PT_SparseGM.ROOT_DIR_NPZ ROOT_DIR_IMG = dataset_cfg.IMC_PT_SparseGM.ROOT_DIR_IMG + URL = 'https://drive.google.com/u/0/uc?export=download&confirm=Z-AR&id=1Po9pRMWXTqKK2ABPpVmkcsOq-6K_2v-B' if len(ds_dict.keys()) > 0: if 'MAX_KPT_NUM' in ds_dict.keys(): MAX_KPT_NUM = ds_dict['MAX_KPT_NUM'] @@ -1007,17 +1011,20 @@ def __init__(self, sets, obj_resize, **ds_dict): ROOT_DIR_NPZ = ds_dict['ROOT_DIR_NPZ'] if 'ROOT_DIR_IMG' in ds_dict.keys(): ROOT_DIR_IMG = ds_dict['ROOT_DIR_IMG'] + if 'URL' in ds_dict.keys(): + URL = ds_dict['URL'] self.dataset_dir = 'data/IMC-PT-SparseGM' if not os.path.exists(ROOT_DIR_IMG): assert ROOT_DIR_IMG == dataset_cfg.IMC_PT_SparseGM.ROOT_DIR_IMG, 'you should not change ROOT_DIR_IMG or ROOT_DIR_NPZ unless the data have been manually downloaded' assert ROOT_DIR_NPZ == dataset_cfg.IMC_PT_SparseGM.ROOT_DIR_NPZ, 'you should not change ROOT_DIR_IMG or ROOT_DIR_NPZ unless the data have been manually downloaded' - self.download(url='https://drive.google.com/u/0/uc?export=download&confirm=Z-AR&id=1Po9pRMWXTqKK2ABPpVmkcsOq-6K_2v-B') + self.download(url=URL) if not os.path.exists(self.dataset_dir): os.makedirs(self.dataset_dir) self.sets = sets self.classes = CLASSES[sets] + self.class_dict = CLASSES self.max_kpt_num = MAX_KPT_NUM self.suffix = 'imcpt-' + str(MAX_KPT_NUM) @@ -1081,9 +1088,9 @@ def process(self): if not os.path.exists(img_file): total_cls = [] - for cls in dataset_cfg.IMC_PT_SparseGM.CLASSES['train']: + for cls in self.class_dict['train']: total_cls.append(cls) - for cls in dataset_cfg.IMC_PT_SparseGM.CLASSES['test']: + for cls in self.class_dict['test']: total_cls.append(cls) total_img_lists = [np.load(self.root_path_npz / cls / 'img_info.npz')['img_name'].tolist() @@ -1156,20 +1163,19 @@ class CUB2011: :param sets: str, problem set, ``'train'`` for training set and ``'test'`` for testing set :param obj_resize: tuple, resized image size - :param ds_dict: settings of dataset, containing at most 2 params(keys) for CUB2011: + :param ds_dict: settings of dataset, containing at most 1 params(key) for CUB2011: * **ROOT_DIR**: str, directory of data - - * **CLS_SPLIT**: str, ``'ori'`` (original split), ``'sup'`` (super class) or ``'all'`` (all birds as one class) """ def __init__(self, sets, obj_resize, **ds_dict): CLS_SPLIT = dataset_cfg.CUB2011.CLASS_SPLIT ROOT_DIR = dataset_cfg.CUB2011.ROOT_DIR + URL = 'https://drive.google.com/u/0/uc?export=download&confirm=B8eu&id=1hbzc_P1FuxMkcabkgn9ZKinBwW683j45' if len(ds_dict.keys()) > 0: - if 'CLS_SPLIT' in ds_dict.keys(): - CLS_SPLIT = ds_dict['CLS_SPLIT'] if 'ROOT_DIR' in ds_dict.keys(): ROOT_DIR = ds_dict['ROOT_DIR'] + if 'URL' in ds_dict.keys(): + URL = ds_dict['URL'] self.set_data = {'train': [], 'test': []} self.classes = [] @@ -1177,14 +1183,14 @@ def __init__(self, sets, obj_resize, **ds_dict): self._set_pairs = {} self._set_mask = {} self.cls_split = CLS_SPLIT - self.suffix = 'cub2011-' + CLS_SPLIT + self.suffix = 'cub2011' self.rootpath = ROOT_DIR self.dataset_dir = 'data/CUB_200_2011' if not os.path.exists(ROOT_DIR): assert ROOT_DIR == dataset_cfg.CUB2011.ROOT_DIR, 'you should not change ROOT_DIR unless the data have been manually downloaded' - self.download(url='https://drive.google.com/u/0/uc?export=download&confirm=B8eu&id=1hbzc_P1FuxMkcabkgn9ZKinBwW683j45') + self.download(url=URL) if not os.path.exists(self.dataset_dir): os.makedirs(self.dataset_dir) @@ -1219,45 +1225,12 @@ def __init__(self, sets, obj_resize, **ds_dict): test_set.append(img_idx) self.set_data['train'].append(train_set) self.set_data['test'].append(test_set) - elif self.cls_split == 'sup': - super_classes = [v.split('_')[-1] for v in classes.values()] - self.classes = list(set(super_classes)) - for cls in self.classes: - self.set_data['train'].append([]) - self.set_data['test'].append([]) - for class_idx in sorted(classes): - supcls_idx = self.classes.index(classes[class_idx].split('_')[-1]) - train_set = [] - test_set = [] - for img_idx in class2img[class_idx]: - if train_split[img_idx] == '1': - train_set.append(img_idx) - else: - test_set.append(img_idx) - self.set_data['train'][supcls_idx] += train_set - self.set_data['test'][supcls_idx] += test_set - elif self.cls_split == 'all': - self.classes.append('cub2011') - self.set_data['train'].append([]) - self.set_data['test'].append([]) - for class_idx in sorted(classes): - train_set = [] - test_set = [] - for img_idx in class2img[class_idx]: - if train_split[img_idx] == '1': - train_set.append(img_idx) - else: - test_set.append(img_idx) - self.set_data['train'][0] += train_set - self.set_data['test'][0] += test_set - else: - raise ValueError('Unknown CUB2011.CLASS_SPLIT {}'.format(self.cls_split)) self.sets = sets self.obj_resize = obj_resize self.process() - def download(self, url=None, retries=10): + def download(self, url=None, retries=50): r""" Automatically download CUB2011 dataset. diff --git a/pygmtools/dataset_config.py b/pygmtools/dataset_config.py index 22a73996..970cbbcb 100644 --- a/pygmtools/dataset_config.py +++ b/pygmtools/dataset_config.py @@ -35,7 +35,7 @@ # CUB2011 dataset __C.CUB2011 = edict() __C.CUB2011.ROOT_DIR = 'data/CUB_200_2011' -__C.CUB2011.CLASS_SPLIT = 'ori' # choose from 'ori' (original split), 'sup' (super class) or 'all' (all birds as one class) +__C.CUB2011.CLASS_SPLIT = 'ori' # choose from 'ori' (original split), 'sup' (super class) or 'all' (all birds as one class), only support 'ori' # SWPair-71 Dataset __C.SPair = edict() diff --git a/tests/test_dataset.py b/tests/test_dataset.py index bdf26890..2b4d3404 100644 --- a/tests/test_dataset.py +++ b/tests/test_dataset.py @@ -11,7 +11,7 @@ import pygmtools as pygm from pygmtools.dataset_config import dataset_cfg from random import choice - +import os # Test dataset download and preprocess, and data fetch and evaluation def _test_benchmark(name, sets, problem, filter, **ds_dict): @@ -19,13 +19,15 @@ def _test_benchmark(name, sets, problem, filter, **ds_dict): if sets == 'test': num = 2 if benchmark.problem == '2GM' else 3 _test_get_data(benchmark, num) + os.remove(benchmark.data_list_path) + os.remove(benchmark.data_path) # Test data fetch and evaluation def _test_get_data(benchmark, num): + data_list, perm_dict, ids = benchmark.rand_get_data(cls=benchmark.classes[0], num=num) rand_data = benchmark.rand_get_data(num=num) assert rand_data is not None - data_list, perm_dict, ids = benchmark.rand_get_data(cls=benchmark.classes[0], num=num) if num == 2: data_length = benchmark.compute_length(num=num) @@ -47,15 +49,15 @@ def _test_get_data(benchmark, num): pred_dict['cls'] = cls pred_dict['perm_mat'] = perm_mat pred.append(pred_dict) - result = benchmark.eval(prediction=pred, classes=[benchmark.classes[0]], verbose=True) - # assert result['mean']['f1'] == 1, f'Accuracy should be 1, something wrong in {benchmark.name} dataset test.' result_cls = benchmark.eval_cls(prediction=pred, cls=benchmark.classes[0], verbose=True) - # assert result_cls['f1'] == 1, f'Accuracy should be 1, something wrong in {benchmark.name} dataset test.' + assert result_cls['f1'] == 1, f'Accuracy should be 1, something wrong in {benchmark.name} dataset test.' + result = benchmark.eval(prediction=pred, classes=[benchmark.classes[0]], verbose=True) + assert result['mean']['f1'] == 1, f'Accuracy should be 1, something wrong in {benchmark.name} dataset test.' # Entry function def test_dataset_and_benchmark(): - dataset_name_list = ['PascalVOC', 'WillowObject', 'SPair71k', 'CUB2011'] + dataset_name_list = ['PascalVOC', 'WillowObject', 'SPair71k', 'IMC_PT_SparseGM', 'CUB2011'] problem_type_list = ['2GM', 'MGM'] set_list = ['train', 'test'] filter_list = ['intersection', 'inclusion', 'unfiltered'] @@ -76,6 +78,7 @@ def test_dataset_and_benchmark(): willow_cfg_dict['SPLIT_OFFSET'] = dataset_cfg.WillowObject.SPLIT_OFFSET willow_cfg_dict['TRAIN_SAME_AS_TEST'] = dataset_cfg.WillowObject.TRAIN_SAME_AS_TEST willow_cfg_dict['RAND_OUTLIER'] = dataset_cfg.WillowObject.RAND_OUTLIER + willow_cfg_dict['URL'] = 'https://drive.google.com/u/0/uc?export=download&confirm=Z-AR&id=18AvGwkuhnih5bFDjfJK5NYM16LvDfwW_' dict_list.append(willow_cfg_dict) spair_cfg_dict = dict() @@ -88,18 +91,18 @@ def test_dataset_and_benchmark(): imcpt_cfg_dict = dict() imcpt_cfg_dict['MAX_KPT_NUM'] = dataset_cfg.IMC_PT_SparseGM.MAX_KPT_NUM - imcpt_cfg_dict['CLASSES'] = dataset_cfg.IMC_PT_SparseGM.CLASSES + imcpt_cfg_dict['CLASSES'] = {'train': ['brandenburg_gate'], + 'test': ['reichstag']} imcpt_cfg_dict['ROOT_DIR_NPZ'] = dataset_cfg.IMC_PT_SparseGM.ROOT_DIR_NPZ imcpt_cfg_dict['ROOT_DIR_IMG'] = dataset_cfg.IMC_PT_SparseGM.ROOT_DIR_IMG + imcpt_cfg_dict['URL'] = 'https://drive.google.com/u/0/uc?id=1bisri2Ip1Of3RsUA8OBrdH5oa6HlH3k-&export=download' dict_list.append(imcpt_cfg_dict) cub_cfg_dict = dict() - cub_cfg_dict['CLS_SPLIT'] = 'sup' cub_cfg_dict['ROOT_DIR'] = dataset_cfg.CUB2011.ROOT_DIR + cub_cfg_dict['URL'] = 'https://drive.google.com/u/0/uc?id=1fcN3m2PmQF7rMQGPxldEICU8CtJ0-F-z&export=download' dict_list.append(cub_cfg_dict) - dict_list.append(dict()) - for i, dataset_name in enumerate(dataset_name_list): for set in set_list: for problem_type in problem_type_list: @@ -108,11 +111,7 @@ def test_dataset_and_benchmark(): continue if filter == 'inclusion' and problem_type == 'MGM': continue - _test_benchmark(dataset_name, set, problem_type, filter, **dict_list[-1]) _test_benchmark(dataset_name, set, problem_type, filter, **dict_list[i]) - if i == 4: - dict_list[i]['CLS_SPLIT'] = 'all' - _test_benchmark(dataset_name, set, problem_type, filter, **dict_list[i]) if __name__ == '__main__': From 961b1592042e959c0d67fa98f8e89960fd6a5c20 Mon Sep 17 00:00:00 2001 From: Runzhong Wang <18309862+rogerwwww@users.noreply.github.com> Date: Sun, 18 Dec 2022 14:19:35 +0800 Subject: [PATCH 5/7] Revert "merge dataset test (#46)" (#47) This reverts commit ac197fe68e8cda8d0c327db51bd0410fe58ee20e. --- .github/workflows/python-package.yml | 67 +---------------------- pygmtools/benchmark.py | 33 +++--------- pygmtools/dataset.py | 79 +++++++++++++++++++--------- pygmtools/dataset_config.py | 2 +- tests/test_dataset.py | 27 +++++----- 5 files changed, 75 insertions(+), 133 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 350a2d02..446cac3b 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -10,7 +10,7 @@ on: branches: [ main ] jobs: - linux: + build: runs-on: ubuntu-latest strategy: @@ -40,68 +40,3 @@ jobs: pytest --cov=pygmtools --cov-report=xml - name: Upload to codecov uses: codecov/codecov-action@v3 - -# macos: -# -# runs-on: macos-latest -# strategy: -# fail-fast: false -# matrix: -# python-version: [ "3.7", "3.8", "3.9" ] -# -# steps: -# - uses: actions/checkout@v2 -# - name: Set up Python ${{ matrix.python-version }} -# uses: actions/setup-python@v2 -# with: -# python-version: ${{ matrix.python-version }} -# - name: Install dependencies -# run: | -# brew reinstall libomp -# brew --prefix libomp -# export LIBRARY_PATH=/usr/local/opt -# python -m pip install --upgrade pip -# python -m pip install flake8 pytest-cov -# pip install -r tests/requirements.txt -# - name: Lint with flake8 -# run: | -# # stop the build if there are Python syntax errors or undefined names -# flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics -# # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide -# flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics -# - name: Test with pytest -# run: | -# pytest --cov=pygmtools --cov-report=xml -# - name: Upload to codecov -# uses: codecov/codecov-action@v3 -# -# windows: -# -# runs-on: windows-latest -# strategy: -# fail-fast: false -# matrix: -# python-version: [ "3.8", "3.9" ] -# -# steps: -# - uses: actions/checkout@v2 -# - name: Set up Python ${{ matrix.python-version }} -# uses: actions/setup-python@v2 -# with: -# python-version: ${{ matrix.python-version }} -# - name: Install dependencies -# run: | -# python -m pip install --upgrade pip -# python -m pip install flake8 pytest-cov -# python -m pip install -r tests\requirements.txt -# - name: Lint with flake8 -# run: | -# # stop the build if there are Python syntax errors or undefined names -# flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics -# # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide -# flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics -# - name: Test with pytest -# run: | -# pytest --cov=pygmtools --cov-report=xml -# - name: Upload to codecov -# uses: codecov/codecov-action@v3 \ No newline at end of file diff --git a/pygmtools/benchmark.py b/pygmtools/benchmark.py index 2da58dfc..0f9edf41 100644 --- a/pygmtools/benchmark.py +++ b/pygmtools/benchmark.py @@ -76,7 +76,7 @@ def get_data(self, ids, test=False, shuffle=True): :param ids: list of image ID, usually in ``train.json`` or ``test.json`` :param test: bool, whether the fetched data is used for test; if true, this function will not return ground truth - :param shuffle: bool, whether to shuffle the order of keypoints + :param shuffle: bool, whether to shuffle the order of keypoints; valid only when the class param ``sets`` is ``'train'`` :return: **data_list**: list of data, like ``[{'img': np.array, 'kpts': coordinates of kpts}, ...]`` @@ -103,7 +103,7 @@ def get_data(self, ids, test=False, shuffle=True): obj_dict['kpts'] = self.data_dict[keys]['kpts'] obj_dict['cls'] = self.data_dict[keys]['cls'] obj_dict['univ_size'] = self.data_dict[keys]['univ_size'] - if shuffle: + if shuffle and self.sets != 'test': random.shuffle(obj_dict['kpts']) data_list.append(obj_dict) @@ -197,7 +197,7 @@ def rand_get_data(self, cls=None, num=2, test=False, shuffle=True): :param cls: int or str, class of expected data. None for random class :param num: int, number of images; for example, 2 for 2GM :param test: bool, whether the fetched data is used for test; if true, this function will not return ground truth - :param shuffle: bool, whether to shuffle the order of keypoints + :param shuffle: bool, whether to shuffle the order of keypoints; valid only when the class param ``sets`` is ``'train'`` :return: **data_list**: list of data, like ``[{'img': np.array, 'kpts': coordinates of kpts}, ...]`` @@ -365,24 +365,14 @@ def compute_img_num(self, classes): return num_list - def eval(self, prediction, classes, verbose=False, rm_gt_cache=True): + def eval(self, prediction, classes, verbose=False): r""" Evaluate test results and compute matching accuracy and coverage. :param prediction: list, prediction result, like ``[{'ids': (id1, id2), 'cls': cls, 'permmat': np.array or scipy.sparse}, ...]`` :param classes: list of evaluated classes :param verbose: bool, whether to print the result - :param rm_gt_cache: bool, whether to remove ground truth cache :return: evaluation result in each class and their averages, including p, r, f1 and their standard deviation and coverage - - .. note:: - If there are duplicate data pair in ``prediction``, this function will only evaluate the first pair and - expect that this pair is also the first fetched pair. Therefore, it is recommended that ``prediction`` is - built in an ordered manner, and not shuffled. - - .. note:: - Ground truth cache is saved when data pairs are fetched, and should be removed after evaluation. Make sure - all data pairs are evaluated at once, i.e., ``prediction`` should contain all fetched data pairs. """ with open(self.data_list_path) as f1: @@ -482,8 +472,6 @@ def eval(self, prediction, classes, verbose=False, rm_gt_cache=True): result['mean']['recall'], result['mean']['recall_std'], result['mean']['f1'], result['mean']['f1_std'] ))) - if rm_gt_cache: - self.rm_gt_cache(last_epoch=False) return result def eval_cls(self, prediction, cls, verbose=False): @@ -494,15 +482,6 @@ def eval_cls(self, prediction, cls, verbose=False): :param cls: str, evaluated class :param verbose: bool, whether to print the result :return: evaluation result on the specified class, including p, r, f1 and their standard deviation and coverage - - .. note:: - If there are duplicate data pair in ``prediction``, this function will only evaluate the first pair and - expect that this pair is also the first fetched pair. Therefore, it is recommended that ``prediction`` is - built in an ordered manner, and not shuffled. Same as the function ``eval``. - - .. note:: - This function will not automatically remove ground truth cache. However, you can still mannually call the - class function ``rm_gt_cache`` to remove groud truth cache after evaluation. """ with open(self.data_list_path) as f1: @@ -568,9 +547,9 @@ class function ``rm_gt_cache`` to remove groud truth cache after evaluation. def rm_gt_cache(self, last_epoch=False): r""" - Remove ground truth cache. It is recommended to call this function after evaluation. + Remove ground truth cache. It is recommended to call this function after evaluation in each epoch. - :param last_epoch: bool, whether this epoch is last epoch; if true, the directory of cache will also be removed, and no more data should be evaluated + :param last_epoch: Boolean variable, whether this epoch is last epoch; if true, the directory of cache will also be removed. """ if os.path.exists(self.gt_cache_path): shutil.rmtree(self.gt_cache_path) diff --git a/pygmtools/dataset.py b/pygmtools/dataset.py index aebe8c39..90cb34c9 100644 --- a/pygmtools/dataset.py +++ b/pygmtools/dataset.py @@ -465,7 +465,6 @@ def __init__(self, sets, obj_resize, **ds_dict): SPLIT_OFFSET = dataset_cfg.WillowObject.SPLIT_OFFSET TRAIN_SAME_AS_TEST = dataset_cfg.WillowObject.TRAIN_SAME_AS_TEST RAND_OUTLIER = dataset_cfg.WillowObject.RAND_OUTLIER - URL = 'http://www.di.ens.fr/willow/research/graphlearning/WILLOW-ObjectClass_dataset.zip' if len(ds_dict.keys()) > 0: if 'CLASSES' in ds_dict.keys(): CLASSES = ds_dict['CLASSES'] @@ -479,13 +478,11 @@ def __init__(self, sets, obj_resize, **ds_dict): TRAIN_SAME_AS_TEST = ds_dict['TRAIN_SAME_AS_TEST'] if 'RAND_OUTLIER' in ds_dict.keys(): RAND_OUTLIER = ds_dict['RAND_OUTLIER'] - if 'URL' in ds_dict.keys(): - URL = ds_dict['URL'] self.dataset_dir = 'data/WillowObject' if not os.path.exists(ROOT_DIR): assert ROOT_DIR == dataset_cfg.WillowObject.ROOT_DIR, 'you should not change ROOT_DIR unless the data have been manually downloaded' - self.download(url=URL) + self.download(url='http://www.di.ens.fr/willow/research/graphlearning/WILLOW-ObjectClass_dataset.zip') if not os.path.exists(self.dataset_dir): os.makedirs(self.dataset_dir) @@ -623,23 +620,23 @@ def process(self): if self.sets == 'train': for x in range(len(self.mat_list)): for name in self.mat_list[x]: - tmp = os.path.split(str(name)) + tmp = str(name).split('/') objID = tmp[-1].split('.')[0] train_list.append(objID) for x in range(len(mat_list_)): for name in mat_list_[x]: - tmp = os.path.split(str(name)) + tmp = str(name).split('/') objID = tmp[-1].split('.')[0] test_list.append(objID) else: for x in range(len(self.mat_list)): for name in self.mat_list[x]: - tmp = os.path.split(str(name)) + tmp = str(name).split('/') objID = tmp[-1].split('.')[0] test_list.append(objID) for x in range(len(mat_list_)): for name in mat_list_[x]: - tmp = os.path.split(str(name)) + tmp = str(name).split('/') objID = tmp[-1].split('.')[0] train_list.append(objID) str1 = json.dumps(train_list) @@ -656,9 +653,9 @@ def process(self): for x in range(len(data_list)): for name in data_list[x]: - tmp = os.path.split(str(name)) + tmp = str(name).split('/') objID = tmp[-1].split('.')[0] - cls = os.path.split(tmp[0])[-1] + cls = tmp[3] annotations = self.__get_anno_dict(name, cls) data_dict[objID] = annotations @@ -867,9 +864,9 @@ def process(self): for x in range(len(data_list)): for name in data_list[x]: - tmp = os.path.split(str(name)) + tmp = str(name).split('/') objID = tmp[-1].split('.')[0] - cls = os.path.split(tmp[0])[-1] + cls = tmp[3] annotations = self.__get_anno_dict(name, cls) ID = objID + '_' + cls data_dict[ID] = annotations @@ -1001,7 +998,6 @@ def __init__(self, sets, obj_resize, **ds_dict): CLASSES = dataset_cfg.IMC_PT_SparseGM.CLASSES ROOT_DIR_NPZ = dataset_cfg.IMC_PT_SparseGM.ROOT_DIR_NPZ ROOT_DIR_IMG = dataset_cfg.IMC_PT_SparseGM.ROOT_DIR_IMG - URL = 'https://drive.google.com/u/0/uc?export=download&confirm=Z-AR&id=1Po9pRMWXTqKK2ABPpVmkcsOq-6K_2v-B' if len(ds_dict.keys()) > 0: if 'MAX_KPT_NUM' in ds_dict.keys(): MAX_KPT_NUM = ds_dict['MAX_KPT_NUM'] @@ -1011,20 +1007,17 @@ def __init__(self, sets, obj_resize, **ds_dict): ROOT_DIR_NPZ = ds_dict['ROOT_DIR_NPZ'] if 'ROOT_DIR_IMG' in ds_dict.keys(): ROOT_DIR_IMG = ds_dict['ROOT_DIR_IMG'] - if 'URL' in ds_dict.keys(): - URL = ds_dict['URL'] self.dataset_dir = 'data/IMC-PT-SparseGM' if not os.path.exists(ROOT_DIR_IMG): assert ROOT_DIR_IMG == dataset_cfg.IMC_PT_SparseGM.ROOT_DIR_IMG, 'you should not change ROOT_DIR_IMG or ROOT_DIR_NPZ unless the data have been manually downloaded' assert ROOT_DIR_NPZ == dataset_cfg.IMC_PT_SparseGM.ROOT_DIR_NPZ, 'you should not change ROOT_DIR_IMG or ROOT_DIR_NPZ unless the data have been manually downloaded' - self.download(url=URL) + self.download(url='https://drive.google.com/u/0/uc?export=download&confirm=Z-AR&id=1Po9pRMWXTqKK2ABPpVmkcsOq-6K_2v-B') if not os.path.exists(self.dataset_dir): os.makedirs(self.dataset_dir) self.sets = sets self.classes = CLASSES[sets] - self.class_dict = CLASSES self.max_kpt_num = MAX_KPT_NUM self.suffix = 'imcpt-' + str(MAX_KPT_NUM) @@ -1088,9 +1081,9 @@ def process(self): if not os.path.exists(img_file): total_cls = [] - for cls in self.class_dict['train']: + for cls in dataset_cfg.IMC_PT_SparseGM.CLASSES['train']: total_cls.append(cls) - for cls in self.class_dict['test']: + for cls in dataset_cfg.IMC_PT_SparseGM.CLASSES['test']: total_cls.append(cls) total_img_lists = [np.load(self.root_path_npz / cls / 'img_info.npz')['img_name'].tolist() @@ -1163,19 +1156,20 @@ class CUB2011: :param sets: str, problem set, ``'train'`` for training set and ``'test'`` for testing set :param obj_resize: tuple, resized image size - :param ds_dict: settings of dataset, containing at most 1 params(key) for CUB2011: + :param ds_dict: settings of dataset, containing at most 2 params(keys) for CUB2011: * **ROOT_DIR**: str, directory of data + + * **CLS_SPLIT**: str, ``'ori'`` (original split), ``'sup'`` (super class) or ``'all'`` (all birds as one class) """ def __init__(self, sets, obj_resize, **ds_dict): CLS_SPLIT = dataset_cfg.CUB2011.CLASS_SPLIT ROOT_DIR = dataset_cfg.CUB2011.ROOT_DIR - URL = 'https://drive.google.com/u/0/uc?export=download&confirm=B8eu&id=1hbzc_P1FuxMkcabkgn9ZKinBwW683j45' if len(ds_dict.keys()) > 0: + if 'CLS_SPLIT' in ds_dict.keys(): + CLS_SPLIT = ds_dict['CLS_SPLIT'] if 'ROOT_DIR' in ds_dict.keys(): ROOT_DIR = ds_dict['ROOT_DIR'] - if 'URL' in ds_dict.keys(): - URL = ds_dict['URL'] self.set_data = {'train': [], 'test': []} self.classes = [] @@ -1183,14 +1177,14 @@ def __init__(self, sets, obj_resize, **ds_dict): self._set_pairs = {} self._set_mask = {} self.cls_split = CLS_SPLIT - self.suffix = 'cub2011' + self.suffix = 'cub2011-' + CLS_SPLIT self.rootpath = ROOT_DIR self.dataset_dir = 'data/CUB_200_2011' if not os.path.exists(ROOT_DIR): assert ROOT_DIR == dataset_cfg.CUB2011.ROOT_DIR, 'you should not change ROOT_DIR unless the data have been manually downloaded' - self.download(url=URL) + self.download(url='https://drive.google.com/u/0/uc?export=download&confirm=B8eu&id=1hbzc_P1FuxMkcabkgn9ZKinBwW683j45') if not os.path.exists(self.dataset_dir): os.makedirs(self.dataset_dir) @@ -1225,12 +1219,45 @@ def __init__(self, sets, obj_resize, **ds_dict): test_set.append(img_idx) self.set_data['train'].append(train_set) self.set_data['test'].append(test_set) + elif self.cls_split == 'sup': + super_classes = [v.split('_')[-1] for v in classes.values()] + self.classes = list(set(super_classes)) + for cls in self.classes: + self.set_data['train'].append([]) + self.set_data['test'].append([]) + for class_idx in sorted(classes): + supcls_idx = self.classes.index(classes[class_idx].split('_')[-1]) + train_set = [] + test_set = [] + for img_idx in class2img[class_idx]: + if train_split[img_idx] == '1': + train_set.append(img_idx) + else: + test_set.append(img_idx) + self.set_data['train'][supcls_idx] += train_set + self.set_data['test'][supcls_idx] += test_set + elif self.cls_split == 'all': + self.classes.append('cub2011') + self.set_data['train'].append([]) + self.set_data['test'].append([]) + for class_idx in sorted(classes): + train_set = [] + test_set = [] + for img_idx in class2img[class_idx]: + if train_split[img_idx] == '1': + train_set.append(img_idx) + else: + test_set.append(img_idx) + self.set_data['train'][0] += train_set + self.set_data['test'][0] += test_set + else: + raise ValueError('Unknown CUB2011.CLASS_SPLIT {}'.format(self.cls_split)) self.sets = sets self.obj_resize = obj_resize self.process() - def download(self, url=None, retries=50): + def download(self, url=None, retries=10): r""" Automatically download CUB2011 dataset. diff --git a/pygmtools/dataset_config.py b/pygmtools/dataset_config.py index 970cbbcb..22a73996 100644 --- a/pygmtools/dataset_config.py +++ b/pygmtools/dataset_config.py @@ -35,7 +35,7 @@ # CUB2011 dataset __C.CUB2011 = edict() __C.CUB2011.ROOT_DIR = 'data/CUB_200_2011' -__C.CUB2011.CLASS_SPLIT = 'ori' # choose from 'ori' (original split), 'sup' (super class) or 'all' (all birds as one class), only support 'ori' +__C.CUB2011.CLASS_SPLIT = 'ori' # choose from 'ori' (original split), 'sup' (super class) or 'all' (all birds as one class) # SWPair-71 Dataset __C.SPair = edict() diff --git a/tests/test_dataset.py b/tests/test_dataset.py index 2b4d3404..bdf26890 100644 --- a/tests/test_dataset.py +++ b/tests/test_dataset.py @@ -11,7 +11,7 @@ import pygmtools as pygm from pygmtools.dataset_config import dataset_cfg from random import choice -import os + # Test dataset download and preprocess, and data fetch and evaluation def _test_benchmark(name, sets, problem, filter, **ds_dict): @@ -19,15 +19,13 @@ def _test_benchmark(name, sets, problem, filter, **ds_dict): if sets == 'test': num = 2 if benchmark.problem == '2GM' else 3 _test_get_data(benchmark, num) - os.remove(benchmark.data_list_path) - os.remove(benchmark.data_path) # Test data fetch and evaluation def _test_get_data(benchmark, num): - data_list, perm_dict, ids = benchmark.rand_get_data(cls=benchmark.classes[0], num=num) rand_data = benchmark.rand_get_data(num=num) assert rand_data is not None + data_list, perm_dict, ids = benchmark.rand_get_data(cls=benchmark.classes[0], num=num) if num == 2: data_length = benchmark.compute_length(num=num) @@ -49,15 +47,15 @@ def _test_get_data(benchmark, num): pred_dict['cls'] = cls pred_dict['perm_mat'] = perm_mat pred.append(pred_dict) + result = benchmark.eval(prediction=pred, classes=[benchmark.classes[0]], verbose=True) + # assert result['mean']['f1'] == 1, f'Accuracy should be 1, something wrong in {benchmark.name} dataset test.' result_cls = benchmark.eval_cls(prediction=pred, cls=benchmark.classes[0], verbose=True) - assert result_cls['f1'] == 1, f'Accuracy should be 1, something wrong in {benchmark.name} dataset test.' + # assert result_cls['f1'] == 1, f'Accuracy should be 1, something wrong in {benchmark.name} dataset test.' - result = benchmark.eval(prediction=pred, classes=[benchmark.classes[0]], verbose=True) - assert result['mean']['f1'] == 1, f'Accuracy should be 1, something wrong in {benchmark.name} dataset test.' # Entry function def test_dataset_and_benchmark(): - dataset_name_list = ['PascalVOC', 'WillowObject', 'SPair71k', 'IMC_PT_SparseGM', 'CUB2011'] + dataset_name_list = ['PascalVOC', 'WillowObject', 'SPair71k', 'CUB2011'] problem_type_list = ['2GM', 'MGM'] set_list = ['train', 'test'] filter_list = ['intersection', 'inclusion', 'unfiltered'] @@ -78,7 +76,6 @@ def test_dataset_and_benchmark(): willow_cfg_dict['SPLIT_OFFSET'] = dataset_cfg.WillowObject.SPLIT_OFFSET willow_cfg_dict['TRAIN_SAME_AS_TEST'] = dataset_cfg.WillowObject.TRAIN_SAME_AS_TEST willow_cfg_dict['RAND_OUTLIER'] = dataset_cfg.WillowObject.RAND_OUTLIER - willow_cfg_dict['URL'] = 'https://drive.google.com/u/0/uc?export=download&confirm=Z-AR&id=18AvGwkuhnih5bFDjfJK5NYM16LvDfwW_' dict_list.append(willow_cfg_dict) spair_cfg_dict = dict() @@ -91,18 +88,18 @@ def test_dataset_and_benchmark(): imcpt_cfg_dict = dict() imcpt_cfg_dict['MAX_KPT_NUM'] = dataset_cfg.IMC_PT_SparseGM.MAX_KPT_NUM - imcpt_cfg_dict['CLASSES'] = {'train': ['brandenburg_gate'], - 'test': ['reichstag']} + imcpt_cfg_dict['CLASSES'] = dataset_cfg.IMC_PT_SparseGM.CLASSES imcpt_cfg_dict['ROOT_DIR_NPZ'] = dataset_cfg.IMC_PT_SparseGM.ROOT_DIR_NPZ imcpt_cfg_dict['ROOT_DIR_IMG'] = dataset_cfg.IMC_PT_SparseGM.ROOT_DIR_IMG - imcpt_cfg_dict['URL'] = 'https://drive.google.com/u/0/uc?id=1bisri2Ip1Of3RsUA8OBrdH5oa6HlH3k-&export=download' dict_list.append(imcpt_cfg_dict) cub_cfg_dict = dict() + cub_cfg_dict['CLS_SPLIT'] = 'sup' cub_cfg_dict['ROOT_DIR'] = dataset_cfg.CUB2011.ROOT_DIR - cub_cfg_dict['URL'] = 'https://drive.google.com/u/0/uc?id=1fcN3m2PmQF7rMQGPxldEICU8CtJ0-F-z&export=download' dict_list.append(cub_cfg_dict) + dict_list.append(dict()) + for i, dataset_name in enumerate(dataset_name_list): for set in set_list: for problem_type in problem_type_list: @@ -111,7 +108,11 @@ def test_dataset_and_benchmark(): continue if filter == 'inclusion' and problem_type == 'MGM': continue + _test_benchmark(dataset_name, set, problem_type, filter, **dict_list[-1]) _test_benchmark(dataset_name, set, problem_type, filter, **dict_list[i]) + if i == 4: + dict_list[i]['CLS_SPLIT'] = 'all' + _test_benchmark(dataset_name, set, problem_type, filter, **dict_list[i]) if __name__ == '__main__': From 42ce297c6e4587668c0d97e2f94bd9716df18bf7 Mon Sep 17 00:00:00 2001 From: roger <18309862+rogerwwww@users.noreply.github.com> Date: Sat, 17 Dec 2022 23:03:59 +0800 Subject: [PATCH 6/7] add eps in cao_fast (fix numerical issue) --- pygmtools/jittor_backend.py | 2 +- pygmtools/numpy_backend.py | 2 +- pygmtools/paddle_backend.py | 2 +- pygmtools/pytorch_backend.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pygmtools/jittor_backend.py b/pygmtools/jittor_backend.py index e0d783d0..a09230b3 100644 --- a/pygmtools/jittor_backend.py +++ b/pygmtools/jittor_backend.py @@ -420,7 +420,7 @@ def _comp_aff_score(x, k): idx[i].append(ix[0].item() if ix.shape[0]>1 else ix.item()) idx = jt.Var(idx) - assert jt.all(score_combo_max >= score_ori), jt.min(score_combo_max - score_ori) + assert jt.all(score_combo_max + 1e-4 >= score_ori), jt.min(score_combo_max - score_ori) X_upt = X_combo[mask1, mask2, idx, :, :] X = X_upt * X_mask + X_upt.transpose(0, 1).transpose(2, 3) * X_mask.transpose(0, 1) + X * (1 - X_mask - X_mask.transpose(0, 1)) assert jt.all(X.transpose(0, 1).transpose(2, 3) == X) diff --git a/pygmtools/numpy_backend.py b/pygmtools/numpy_backend.py index fbea2565..1a9fcf5c 100644 --- a/pygmtools/numpy_backend.py +++ b/pygmtools/numpy_backend.py @@ -443,7 +443,7 @@ def _comp_aff_score(x, k): idx = np.argmax(score_combo,axis=-1) score_combo = np.max(score_combo, axis=-1) - assert np.all(score_combo >= score_ori), np.min(score_combo - score_ori) + assert np.all(score_combo + 1e-4 >= score_ori), np.min(score_combo - score_ori) X_upt = X_combo[mask1, mask2, idx, :, :] X = X_upt * X_mask + X_upt.swapaxes(0,1).swapaxes(2,3) * X_mask.swapaxes(0,1) + X * (1 - X_mask - X_mask.swapaxes(0, 1)) assert np.all(X.swapaxes(0,1).swapaxes(2,3) == X) diff --git a/pygmtools/paddle_backend.py b/pygmtools/paddle_backend.py index 358aacb4..e47d262a 100644 --- a/pygmtools/paddle_backend.py +++ b/pygmtools/paddle_backend.py @@ -431,7 +431,7 @@ def _comp_aff_score(x, k): idx = paddle.argmax(score_combo, axis=-1) score_combo = paddle.max(score_combo, axis=-1) - # assert paddle.all(score_combo >= score_ori), paddle.min(score_combo - score_ori) + assert paddle.all(score_combo + 1e-4 >= score_ori), paddle.min(score_combo - score_ori) X_upt = X_combo[mask1, mask2, idx] X = X_upt * X_mask + X_upt.transpose((1, 0, 3, 2))* X_mask.transpose((1, 0, 2, 3)) + X * (1 - X_mask - X_mask.transpose((1, 0, 2, 3))) assert paddle.all(X.transpose((1, 0, 3, 2)) == X) diff --git a/pygmtools/pytorch_backend.py b/pygmtools/pytorch_backend.py index 8a4b8b1d..6f155e2d 100644 --- a/pygmtools/pytorch_backend.py +++ b/pygmtools/pytorch_backend.py @@ -431,7 +431,7 @@ def _comp_aff_score(x, k): score_combo, idx = torch.max(score_combo, dim=-1) - assert torch.all(score_combo >= score_ori), torch.min(score_combo - score_ori) + assert torch.all(score_combo + 1e-4 >= score_ori), torch.min(score_combo - score_ori) X_upt = X_combo[mask1, mask2, idx, :, :] X = X_upt * X_mask + X_upt.transpose(0, 1).transpose(2, 3) * X_mask.transpose(0, 1) + X * (1 - X_mask - X_mask.transpose(0, 1)) assert torch.all(X.transpose(0, 1).transpose(2, 3) == X) From 421e94cfd6eadfd6103a2bb7f88b09b93b99421a Mon Sep 17 00:00:00 2001 From: roger <18309862+rogerwwww@users.noreply.github.com> Date: Sun, 18 Dec 2022 14:21:03 +0800 Subject: [PATCH 7/7] fix skipped test code --- tests/test_multi_graph_solvers.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/test_multi_graph_solvers.py b/tests/test_multi_graph_solvers.py index 1f7629da..29165520 100644 --- a/tests/test_multi_graph_solvers.py +++ b/tests/test_multi_graph_solvers.py @@ -186,13 +186,13 @@ def test_gamgm(): num_nodes = 5 num_graphs = 10 # test without outliers - # _test_mgm_solver_on_isomorphic_graphs(num_graphs, num_nodes, 10, pygm.gamgm, 'kb-qap', { - # 'sk_init_tau': [0.5, 0.1], - # 'sk_min_tau': [0.1, 0.05], - # 'param_lambda': [0.1, 0.5], - # 'node_aff_fn': [functools.partial(pygm.utils.gaussian_aff_fn, sigma=.1), pygm.utils.inner_prod_aff_fn], - # 'verbose': [True] - # }, ['pytorch', 'numpy', 'paddle', 'jittor']) + _test_mgm_solver_on_isomorphic_graphs(num_graphs, num_nodes, 10, pygm.gamgm, 'kb-qap', { + 'sk_init_tau': [0.5, 0.1], + 'sk_min_tau': [0.1, 0.05], + 'param_lambda': [0.1, 0.5], + 'node_aff_fn': [functools.partial(pygm.utils.gaussian_aff_fn, sigma=.1), pygm.utils.inner_prod_aff_fn], + 'verbose': [True] + }, ['pytorch', 'numpy', 'paddle', 'jittor']) # test with outliers _test_mgm_solver_on_isomorphic_graphs(num_graphs, num_nodes, 10, pygm.gamgm, 'kb-qap', {