diff --git a/.gitignore b/.gitignore index c6c6ba6d..9c15768f 100644 --- a/.gitignore +++ b/.gitignore @@ -25,7 +25,9 @@ output/*.html output/*/index.html # Sphinx documentation -doc/_build +/doc/_build/ +/doc/examples/ +/doc/backrefs/ # Vim swap files .*.swp diff --git a/.readthedocs.yml b/.readthedocs.yml index c6869239..5cbbda17 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -5,5 +5,4 @@ python: version: 3 pip_install: true extra_requirements: - - alldeps - - doc + - dev diff --git a/.travis.yml b/.travis.yml index 9cf279cc..6b472238 100644 --- a/.travis.yml +++ b/.travis.yml @@ -19,7 +19,8 @@ matrix: addons: apt: packages: - - libqt5gui5 # pyqt5>5.11 fails to load the xcb platform plugin without it + - libqt5gui5 # pyqt5>5.11 otherwise cannot load the xcb platform plugin + - libflann-dev install: - pip install -U --upgrade-strategy eager .[alldeps,test,doc] diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 63604dcf..edefd8f7 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -14,7 +14,20 @@ The package can be set up (ideally in a virtual environment) for local development with the following:: $ git clone https://github.com/epfl-lts2/pygsp.git - $ pip install -U -e pygsp[alldeps,test,doc,pkg] + $ pip install -U -e pygsp[dev] +<<<<<<< HEAD + +The ``dev`` "extras requirement" ensures that dependencies required for +development (to run the test suite and build the documentation) are installed. +Only `graph-tool `_ will be missing: install it +manually as it cannot be installed by pip. +======= +>>>>>>> merge all the extra requirements in a single dev requirement + +The ``dev`` "extras requirement" ensures that dependencies required for +development (to run the test suite and build the documentation) are installed. +Only `graph-tool `_ will be missing: install it +manually as it cannot be installed by pip. You can improve or add functionality in the ``pygsp`` folder, along with corresponding unit tests in ``pygsp/tests/test_*.py`` (with reasonable @@ -36,6 +49,13 @@ documentation with the following (enforced by Travis CI):: Check the generated coverage report at ``htmlcov/index.html`` to make sure the tests reasonably cover the changes you've introduced. +To iterate faster, you can partially run the test suite, at various degrees of +granularity, as follows:: + + $ python -m unittest pygsp.tests.test_docstrings.suite_reference + $ python -m unittest pygsp.tests.test_graphs.TestImportExport + $ python -m unittest pygsp.tests.test_graphs.TestImportExport.test_save_load + Making a release ---------------- diff --git a/README.rst b/README.rst index 04299075..d1caf309 100644 --- a/README.rst +++ b/README.rst @@ -2,6 +2,14 @@ PyGSP: Graph Signal Processing in Python ======================================== +The PyGSP is a Python package to ease +`Signal Processing on Graphs `_. +The documentation is available on +`Read the Docs `_ +and development takes place on +`GitHub `_. +A (mostly unmaintained) `Matlab version `_ exists. + +-----------------------------------+ | |doc| |pypi| |conda| |binder| | +-----------------------------------+ @@ -26,18 +34,11 @@ PyGSP: Graph Signal Processing in Python :target: https://coveralls.io/github/epfl-lts2/pygsp .. |github| image:: https://img.shields.io/github/stars/epfl-lts2/pygsp.svg?style=social :target: https://github.com/epfl-lts2/pygsp -.. |binder| image:: https://mybinder.org/badge.svg +.. |binder| image:: https://mybinder.org/badge_logo.svg :target: https://mybinder.org/v2/gh/epfl-lts2/pygsp/master?filepath=playground.ipynb .. |conda| image:: https://anaconda.org/conda-forge/pygsp/badges/installer/conda.svg :target: https://anaconda.org/conda-forge/pygsp -The PyGSP is a Python package to ease -`Signal Processing on Graphs `_. -The documentation is available on -`Read the Docs `_ -and development takes place on -`GitHub `_. -A (mostly unmaintained) `Matlab version `_ exists. The PyGSP facilitates a wide variety of operations on graphs, like computing their Fourier basis, filtering or interpolating signals, plotting graphs, @@ -53,6 +54,15 @@ exponential window; and Gabor filters. Despite all the pre-defined models, you can easily use a custom graph by defining its adjacency matrix, and a custom filter bank by defining a set of functions in the spectral domain. +While NetworkX_ and graph-tool_ are tools to analyze the topology of graphs, +the aim of the PyGSP is to analyze graph signals, also known as features or +properties (i.e., not the graph itself). +Those three tools are complementary and work well together with the provided +import / export facility. + +.. _NetworkX: https://networkx.github.io +.. _graph-tool: https://graph-tool.skewed.de + The following demonstrates how to instantiate a graph and a filter, the two main objects of the package. @@ -100,13 +110,16 @@ The PyGSP is available on PyPI:: $ pip install pygsp -Note that you will need a recent version of ``pip`` and ``setuptools``. Please -run ``pip install --upgrade pip setuptools`` if you get any installation error. - The PyGSP is available on `conda-forge `_:: $ conda install -c conda-forge pygsp +The PyGSP is available in the `Arch User Repository `_:: + + $ git clone https://aur.archlinux.org/python-pygsp.git + $ cd python-pygsp + $ makepkg -csi + Contributing ------------ diff --git a/doc/conf.py b/doc/conf.py index 37865e04..569c1f04 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -2,11 +2,13 @@ import pygsp -extensions = ['sphinx.ext.viewcode', - 'sphinx.ext.autosummary', - 'sphinx.ext.mathjax', - 'sphinx.ext.inheritance_diagram', - 'sphinxcontrib.bibtex'] +extensions = [ + 'sphinx.ext.viewcode', + 'sphinx.ext.autosummary', + 'sphinx.ext.mathjax', + 'sphinx.ext.inheritance_diagram', + 'sphinxcontrib.bibtex', +] extensions.append('sphinx.ext.autodoc') autodoc_default_flags = ['members', 'undoc-members'] @@ -37,6 +39,17 @@ from pygsp import graphs, filters, utils, plotting """ +extensions.append('sphinx_gallery.gen_gallery') +sphinx_gallery_conf = { + 'examples_dirs': '../examples', + 'gallery_dirs': 'examples', + 'filename_pattern': '/', + 'reference_url': {'pygsp': None}, + 'backreferences_dir': 'backrefs', + 'doc_module': 'pygsp', + 'show_memory': True, +} + exclude_patterns = ['_build'] source_suffix = '.rst' master_doc = 'index' diff --git a/doc/history.rst b/doc/history.rst index fa6cd4d6..f87b7a9d 100644 --- a/doc/history.rst +++ b/doc/history.rst @@ -24,6 +24,27 @@ History * New implementation of the Sensor graph that is simpler and scales better. * A new learning module with three functions to solve standard semi-supervised classification and regression problems. +<<<<<<< HEAD +<<<<<<< HEAD +* A much improved, fixed, documented, and tested NNGraph. The user can now + select the backend and similarity kernel. The radius can be estimated and + features standardized. (PR #43) +======= +======= +* Import and export graphs and their signals to NetworkX and graph-tool. +* Save and load graphs and theirs signals to / from GraphML, GML, and GEXF. +<<<<<<< HEAD +>>>>>>> update I/O doc +======= +* Documentation: path graph linked to DCT, ring graph linked to DFT. +<<<<<<< HEAD +>>>>>>> doc: path and DCT, ring and DFT +======= +* We now have a gallery of examples! That is convenient for users to get a + taste of what the library can do, and to start working from a code snippet. +>>>>>>> history: examples gallery +* Merged all the extra requirements in a single dev requirement. +>>>>>>> merge all the extra requirements in a single dev requirement Experimental filter API (to be tested and validated): @@ -89,7 +110,7 @@ The following packages were made optional dependencies: workflow, it's not necessary for users who only want to process data without plotting graphs, signals and filters. * pyflann, as it is only used for approximate kNN. The problem was that the - source distribution would not build for Windows. On conda-forge, (py)flann + source distribution would not build for Windows. On conda-forge, (py)flann is not built for Windows either. Moreover, matplotlib is now the default drawing backend. It's well integrated diff --git a/doc/index.rst b/doc/index.rst index 4fab6d08..1ebdcf59 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -5,6 +5,7 @@ Home tutorials/index + examples/index reference/index contributing history diff --git a/doc/reference/index.rst b/doc/reference/index.rst index 4e58903d..71422b6b 100644 --- a/doc/reference/index.rst +++ b/doc/reference/index.rst @@ -1,6 +1,6 @@ -=============== -Reference guide -=============== +============= +API reference +============= .. automodule:: pygsp diff --git a/doc/references.bib b/doc/references.bib index 74580d5e..de0ade43 100644 --- a/doc/references.bib +++ b/doc/references.bib @@ -77,7 +77,7 @@ @article{klein1993resistance publisher={Springer} } -@article{strang1999discrete, +@article{strang1999dct, title={The discrete cosine transform}, author={Strang, Gilbert}, journal={SIAM review}, @@ -85,7 +85,8 @@ @article{strang1999discrete number={1}, pages={135--147}, year={1999}, - publisher={SIAM} + publisher={SIAM}, + url={https://sci-hub.tw/10.1137/S0036144598336745}, } @article{shuman2013spectrum, diff --git a/doc/tutorials/optimization.rst b/doc/tutorials/optimization.rst index 7127a9c5..b3740d85 100644 --- a/doc/tutorials/optimization.rst +++ b/doc/tutorials/optimization.rst @@ -85,7 +85,7 @@ We start with the graph TV regularization. We will use the :class:`pyunlocbox.so >>> prob1 = pyunlocbox.solvers.solve([d, r, f], solver=solver, ... x0=x0, rtol=0, maxit=1000) Solution found after 1000 iterations: - objective function f(sol) = 2.250584e+02 + objective function f(sol) = 2.256055e+02 stopping criterion: MAXIT >>> >>> fig, ax = G.plot(prob1['sol']) @@ -107,7 +107,7 @@ This figure shows the label signal recovered by graph total variation regulariza >>> prob2 = pyunlocbox.solvers.solve([r, f], solver=solver, ... x0=x0, rtol=0, maxit=1000) Solution found after 1000 iterations: - objective function f(sol) = 6.504290e+01 + objective function f(sol) = 4.376481e+01 stopping criterion: MAXIT >>> >>> fig, ax = G.plot(prob2['sol']) diff --git a/examples/README.txt b/examples/README.txt new file mode 100644 index 00000000..b90c0e1c --- /dev/null +++ b/examples/README.txt @@ -0,0 +1,3 @@ +======== +Examples +======== diff --git a/examples/eigenvalue_concentration.py b/examples/eigenvalue_concentration.py new file mode 100644 index 00000000..c372bf2a --- /dev/null +++ b/examples/eigenvalue_concentration.py @@ -0,0 +1,38 @@ +r""" +Concentration of the eigenvalues +================================ + +The eigenvalues of the graph Laplacian concentrates to the same value as the +graph becomes full. +""" + +import numpy as np +from matplotlib import pyplot as plt +import pygsp as pg + +n_neighbors = [1, 2, 5, 8] +fig, axes = plt.subplots(3, len(n_neighbors), figsize=(15, 8)) + +for k, ax in zip(n_neighbors, axes.T): + + graph = pg.graphs.Ring(17, k=k) + graph.compute_fourier_basis() + graph.plot(graph.U[:, 1], ax=ax[0]) + ax[0].axis('equal') + ax[1].spy(graph.W) + ax[2].plot(graph.e, '.') + ax[2].set_title('k={}'.format(k)) + #graph.set_coordinates('line1D') + #graph.plot(graph.U[:, :4], ax=ax[3], title='') + + # Check that the DFT matrix is an eigenbasis of the Laplacian. + U = np.fft.fft(np.identity(graph.n_vertices)) + LambdaM = (graph.L.todense().dot(U)) / (U + 1e-15) + # Eigenvalues should be real. + assert np.all(np.abs(np.imag(LambdaM)) < 1e-10) + LambdaM = np.real(LambdaM) + # Check that the eigenvectors are really eigenvectors of the laplacian. + Lambda = np.mean(LambdaM, axis=0) + assert np.all(np.abs(LambdaM - Lambda) < 1e-10) + +fig.tight_layout() diff --git a/examples/filtering.py b/examples/filtering.py new file mode 100644 index 00000000..62256685 --- /dev/null +++ b/examples/filtering.py @@ -0,0 +1,66 @@ +r""" +Filtering a graph signal +======================== + +A graph signal is filtered by transforming it to the spectral domain (via the +Fourier transform), performing a point-wise multiplication (motivated by the +convolution theorem), and transforming it back to the vertex domain (via the +inverse graph Fourier transform). + +.. note:: + + In practice, filtering is implemented in the vertex domain to avoid the + computationally expensive graph Fourier transform. To do so, filters are + implemented as polynomials of the eigenvalues / Laplacian. Hence, filtering + a signal reduces to its multiplications with sparse matrices (the graph + Laplacian). + +""" + +import numpy as np +from matplotlib import pyplot as plt +import pygsp as pg + +G = pg.graphs.Sensor(seed=42) +G.compute_fourier_basis() + +#g = pg.filters.Rectangular(G, band_max=0.2) +g = pg.filters.Expwin(G, band_max=0.5) + +fig, axes = plt.subplots(1, 3, figsize=(12, 4)) +fig.subplots_adjust(hspace=0.5) + +x = np.random.RandomState(1).normal(size=G.N) +#x = np.random.RandomState(42).uniform(-1, 1, size=G.N) +x = 3 * x / np.linalg.norm(x) +y = g.filter(x) +x_hat = G.gft(x).squeeze() +y_hat = G.gft(y).squeeze() + +limits = [x.min(), x.max()] + +G.plot(x, limits=limits, ax=axes[0], title='input signal $x$ in the vertex domain') +axes[0].text(0, -0.1, '$x^T L x = {:.2f}$'.format(G.dirichlet_energy(x))) +axes[0].set_axis_off() + +g.plot(ax=axes[1], alpha=1) +line_filt = axes[1].lines[-2] +line_in, = axes[1].plot(G.e, np.abs(x_hat), '.-') +line_out, = axes[1].plot(G.e, np.abs(y_hat), '.-') +#axes[1].set_xticks(range(0, 16, 4)) +axes[1].set_xlabel(r'graph frequency $\lambda$') +axes[1].set_ylabel(r'frequency content $\hat{x}(\lambda)$') +axes[1].set_title(r'signals in the spectral domain') +axes[1].legend(['input signal $\hat{x}$']) +labels = [ + r'input signal $\hat{x}$', + 'kernel $g$', + r'filtered signal $\hat{y}$', +] +axes[1].legend([line_in, line_filt, line_out], labels, loc='upper right') + +G.plot(y, limits=limits, ax=axes[2], title='filtered signal $y$ in the vertex domain') +axes[2].text(0, -0.1, '$y^T L y = {:.2f}$'.format(G.dirichlet_energy(y))) +axes[2].set_axis_off() + +fig.tight_layout() diff --git a/examples/fourier_basis.py b/examples/fourier_basis.py new file mode 100644 index 00000000..1064c58f --- /dev/null +++ b/examples/fourier_basis.py @@ -0,0 +1,42 @@ +r""" +Fourier basis of graphs +======================= + +The eigenvectors of the graph Laplacian form the Fourier basis. +The eigenvalues are a measure of variation of their corresponding eigenvector. +The lower the eigenvalue, the smoother the eigenvector. They are hence a +measure of "frequency". + +In classical signal processing, Fourier modes are completely delocalized, like +on the grid graph. For general graphs however, Fourier modes might be +localized. See :attr:`pygsp.graphs.Graph.coherence`. +""" + +import numpy as np +from matplotlib import pyplot as plt +import pygsp as pg + +n_eigenvectors = 7 + +fig, axes = plt.subplots(2, 7, figsize=(15, 4)) + +def plot_eigenvectors(G, axes): + G.compute_fourier_basis(n_eigenvectors) + limits = [f(G.U) for f in (np.min, np.max)] + for i, ax in enumerate(axes): + G.plot(G.U[:, i], limits=limits, colorbar=False, vertex_size=50, ax=ax) + energy = abs(G.dirichlet_energy(G.U[:, i])) + ax.set_title(r'$u_{0}^\top L u_{0} = {1:.2f}$'.format(i+1, energy)) + ax.set_axis_off() + +G = pg.graphs.Grid2d(10, 10) +plot_eigenvectors(G, axes[0]) +fig.subplots_adjust(hspace=0.5, right=0.8) +cax = fig.add_axes([0.82, 0.60, 0.01, 0.26]) +fig.colorbar(axes[0, -1].collections[1], cax=cax, ticks=[-0.2, 0, 0.2]) + +G = pg.graphs.Sensor(seed=42) +plot_eigenvectors(G, axes[1]) +fig.subplots_adjust(hspace=0.5, right=0.8) +cax = fig.add_axes([0.82, 0.16, 0.01, 0.26]) +fig.colorbar(axes[1, -1].collections[1], cax=cax, ticks=[-0.4, 0, 0.4]) diff --git a/examples/fourier_transform.py b/examples/fourier_transform.py new file mode 100644 index 00000000..2589ef12 --- /dev/null +++ b/examples/fourier_transform.py @@ -0,0 +1,46 @@ +r""" +Fourier transform +================= + +The graph Fourier transform (:meth:`pygsp.graphs.Graph.gft`) transforms a +signal from the vertex domain to the spectral domain. The smoother the signal +(see :meth:`pygsp.graphs.Graph.dirichlet_energy`), the lower in the frequencies +its energy is concentrated. +""" + +import numpy as np +from matplotlib import pyplot as plt +import pygsp as pg + +G = pg.graphs.Sensor(seed=42) +G.compute_fourier_basis() + +scales = [10, 3, 0] +limit = 0.32 + +fig, axes = plt.subplots(2, len(scales), figsize=(12, 4)) +fig.subplots_adjust(hspace=0.5) + +x0 = np.random.RandomState(1).normal(size=G.N) +for i, scale in enumerate(scales): + g = pg.filters.Heat(G, scale) + x = g.filter(x0).squeeze() + x /= np.linalg.norm(x) + x_hat = G.gft(x).squeeze() + + assert np.all((-limit < x) & (x < limit)) + G.plot(x, limits=[-limit, limit], ax=axes[0, i]) + axes[0, i].set_axis_off() + axes[0, i].set_title('$x^T L x = {:.2f}$'.format(G.dirichlet_energy(x))) + + axes[1, i].plot(G.e, np.abs(x_hat), '.-') + axes[1, i].set_xticks(range(0, 16, 4)) + axes[1, i].set_xlabel(r'graph frequency $\lambda$') + axes[1, i].set_ylim(-0.05, 0.95) + +axes[1, 0].set_ylabel(r'frequency content $\hat{x}(\lambda)$') + +# axes[0, 0].set_title(r'$x$: signal in the vertex domain') +# axes[1, 0].set_title(r'$\hat{x}$: signal in the spectral domain') + +fig.tight_layout() diff --git a/examples/heat_diffusion.py b/examples/heat_diffusion.py new file mode 100644 index 00000000..81701bbe --- /dev/null +++ b/examples/heat_diffusion.py @@ -0,0 +1,45 @@ +r""" +Heat diffusion on graphs +======================== + +Solve the heat equation by filtering the initial conditions with the heat +kernel. +""" + +from os import path + +import numpy as np +from matplotlib import pyplot as plt +import pygsp as pg + +n_side = 13 +G = pg.graphs.Grid2d(n_side) +G.compute_fourier_basis() + +sources = [ + (n_side//4 * n_side) + (n_side//4), + (n_side*3//4 * n_side) + (n_side*3//4), +] +x = np.zeros(G.n_vertices) +x[sources] = 5 + +times = [0, 5, 10, 20] + +fig, axes = plt.subplots(2, len(times), figsize=(12, 5)) +for i, t in enumerate(times): + g = pg.filters.Heat(G, scale=t) + title = r'$\hat{{f}}({0}) = g_{{1,{0}}} \odot \hat{{f}}(0)$'.format(t) + g.plot(alpha=1, ax=axes[0, i], title=title) + axes[0, i].set_xlabel(r'$\lambda$') +# axes[0, i].set_ylabel(r'$g(\lambda)$') + if i > 0: + axes[0, i].set_ylabel('') + y = g.filter(x) + line, = axes[0, i].plot(G.e, G.gft(y)) + labels = [r'$\hat{{f}}({})$'.format(t), r'$g_{{1,{}}}$'.format(t)] + axes[0, i].legend([line, axes[0, i].lines[-3]], labels, loc='lower right') + G.plot(y, edges=False, highlight=sources, ax=axes[1, i], title=r'$f({})$'.format(t)) + axes[1, i].set_aspect('equal', 'box') + axes[1, i].set_axis_off() + +fig.tight_layout() diff --git a/examples/kernel_localization.py b/examples/kernel_localization.py new file mode 100644 index 00000000..d9484fa9 --- /dev/null +++ b/examples/kernel_localization.py @@ -0,0 +1,40 @@ +r""" +Kernel localization +=================== + +In classical signal processing, a filter can be translated in the vertex +domain. We cannot do that on graphs. Instead, we can +:meth:`~pygsp.filters.Filter.localize` a filter kernel. Note how on classic +structures (like the ring), the localized kernel is the same everywhere, while +it changes when localized on irregular graphs. +""" + +import numpy as np +from matplotlib import pyplot as plt +import pygsp as pg + +fig, axes = plt.subplots(2, 4, figsize=(10, 4)) + +graphs = [ + pg.graphs.Ring(40), + pg.graphs.Sensor(64, seed=42), +] + +locations = [0, 10, 20] + +for graph, axs in zip(graphs, axes): + graph.compute_fourier_basis() + g = pg.filters.Heat(graph) + g.plot(ax=axs[0], title='heat kernel') + axs[0].set_xlabel(r'eigenvalues $\lambda$') + axs[0].set_ylabel(r'$g(\lambda) = \exp \left( \frac{{-{}\lambda}}{{\lambda_{{max}}}} \right)$'.format(g.scale[0])) + maximum = 0 + for loc in locations: + x = g.localize(loc) + maximum = np.maximum(maximum, x.max()) + for loc, ax in zip(locations, axs[1:]): + graph.plot(g.localize(loc), limits=[0, maximum], highlight=loc, ax=ax, + title=r'$g(L) \delta_{{{}}}$'.format(loc)) + ax.set_axis_off() + +fig.tight_layout() diff --git a/examples/random_walk.py b/examples/random_walk.py new file mode 100644 index 00000000..9fc69456 --- /dev/null +++ b/examples/random_walk.py @@ -0,0 +1,67 @@ +r""" +Random walks +============ + +Probability of a random walker to be on any given vertex after a given number +of steps starting from a given distribution. +""" + +# sphinx_gallery_thumbnail_number = 2 + +import numpy as np +from scipy import sparse +from matplotlib import pyplot as plt +import pygsp as pg + +N = 7 +steps = [0, 1, 2, 3] + +graph = pg.graphs.Grid2d(N) +delta = np.zeros(graph.N) +delta[N//2*N + N//2] = 1 + +probability = sparse.diags(graph.dw**(-1)).dot(graph.W) + +fig, axes = plt.subplots(1, len(steps), figsize=(12, 3)) +for step, ax in zip(steps, axes): + state = (probability**step).__rmatmul__(delta) ## = delta @ probability**step + graph.plot(state, ax=ax, title=r'$\delta P^{}$'.format(step)) + ax.set_axis_off() + +fig.tight_layout() + +############################################################################### +# Stationary distribution. + +graphs = [ + pg.graphs.Ring(10), + pg.graphs.Grid2d(5), + pg.graphs.Comet(8, 4), + pg.graphs.BarabasiAlbert(20, seed=42), +] + +fig, axes = plt.subplots(1, len(graphs), figsize=(12, 3)) + +for graph, ax in zip(graphs, axes): + + if not hasattr(graph, 'coords'): + graph.set_coordinates(seed=10) + + P = sparse.diags(graph.dw**(-1)).dot(graph.W) + +# e, u = np.linalg.eig(P.T.toarray()) +# np.testing.assert_allclose(np.linalg.inv(u.T) @ np.diag(e) @ u.T, +# P.toarray(), atol=1e-10) +# np.testing.assert_allclose(np.abs(e[0]), 1) +# stationary = np.abs(u.T[0]) + + e, u = sparse.linalg.eigs(P.T, k=1, which='LR') + np.testing.assert_allclose(e, 1) + stationary = np.abs(u).squeeze() + assert np.all(stationary < 0.71) + + colorbar = False if type(graph) is pg.graphs.Ring else True + graph.plot(stationary, colorbar=colorbar, ax=ax, title='$xP = x$') + ax.set_axis_off() + +fig.tight_layout() diff --git a/examples/wave_propagation.py b/examples/wave_propagation.py new file mode 100644 index 00000000..ec69b73c --- /dev/null +++ b/examples/wave_propagation.py @@ -0,0 +1,45 @@ +r""" +Wave propagation on graphs +========================== + +Solve the wave equation by filtering the initial conditions with the wave +kernel. +""" + +from os import path + +import numpy as np +from matplotlib import pyplot as plt +import pygsp as pg + +n_side = 13 +G = pg.graphs.Grid2d(n_side) +G.compute_fourier_basis() + +sources = [ + (n_side//4 * n_side) + (n_side//4), + (n_side*3//4 * n_side) + (n_side*3//4), +] +x = np.zeros(G.n_vertices) +x[sources] = 5 + +times = [0, 5, 10, 20] + +fig, axes = plt.subplots(2, len(times), figsize=(12, 5)) +for i, t in enumerate(times): + g = pg.filters.Wave(G, time=t, speed=1) + title = r'$\hat{{f}}({0}) = g_{{1,{0}}} \odot \hat{{f}}(0)$'.format(t) + g.plot(alpha=1, ax=axes[0, i], title=title) + axes[0, i].set_xlabel(r'$\lambda$') +# axes[0, i].set_ylabel(r'$g(\lambda)$') + if i > 0: + axes[0, i].set_ylabel('') + y = g.filter(x) + line, = axes[0, i].plot(G.e, G.gft(y)) + labels = [r'$\hat{{f}}({})$'.format(t), r'$g_{{1,{}}}$'.format(t)] + axes[0, i].legend([line, axes[0, i].lines[-3]], labels, loc='lower right') + G.plot(y, edges=False, highlight=sources, ax=axes[1, i], title=r'$f({})$'.format(t)) + axes[1, i].set_aspect('equal', 'box') + axes[1, i].set_axis_off() + +fig.tight_layout() diff --git a/postBuild b/postBuild index 6eb53a4d..e1530e8e 100755 --- a/postBuild +++ b/postBuild @@ -1,3 +1,3 @@ #!/bin/bash # Tell https://mybinder.org to simply install the package. -pip install .[alldeps] +pip install .[dev] diff --git a/pygsp/__init__.py b/pygsp/__init__.py index c75afd69..5a51c679 100644 --- a/pygsp/__init__.py +++ b/pygsp/__init__.py @@ -25,6 +25,7 @@ 'plotting', 'reduction', 'features', + 'learning', 'optimization', 'utils', ] diff --git a/pygsp/_nearest_neighbor.py b/pygsp/_nearest_neighbor.py new file mode 100644 index 00000000..daa87c60 --- /dev/null +++ b/pygsp/_nearest_neighbor.py @@ -0,0 +1,245 @@ +# -*- coding: utf-8 -*- + +from __future__ import division + +import numpy as np +from scipy import sparse, spatial +from pygsp import utils + +def _scipy_pdist(features, metric, order, kind, k, radius, params): + if params: + raise ValueError('unexpected parameters {}'.format(params)) + metric = 'cityblock' if metric == 'manhattan' else metric + metric = 'chebyshev' if metric == 'max_dist' else metric + params = dict(metric=metric) + if metric == 'minkowski': + params['p'] = order + dist = spatial.distance.pdist(features, **params) + dist = spatial.distance.squareform(dist) + if kind == 'knn': + neighbors = np.argsort(dist)[:, :k+1] + distances = np.take_along_axis(dist, neighbors, axis=-1) + elif kind == 'radius': + distances = [] + neighbors = [] + for distance in dist: + neighbor = np.flatnonzero(distance < radius) + neighbors.append(neighbor) + distances.append(distance[neighbor]) + return neighbors, distances + + +def _scipy_kdtree(features, _, order, kind, k, radius, params): + if order is None: + raise ValueError('invalid metric for scipy-kdtree') + eps = params.pop('eps', 0) + tree = spatial.KDTree(features, **params) + params = dict(p=order, eps=eps) + if kind == 'knn': + params['k'] = k + 1 + elif kind == 'radius': + params['k'] = None + params['distance_upper_bound'] = radius + distances, neighbors = tree.query(features, **params) + return neighbors, distances + + +def _scipy_ckdtree(features, _, order, kind, k, radius, params): + if order is None: + raise ValueError('invalid metric for scipy-kdtree') + eps = params.pop('eps', 0) + tree = spatial.cKDTree(features, **params) + params = dict(p=order, eps=eps, n_jobs=-1) + if kind == 'knn': + params['k'] = k + 1 + elif kind == 'radius': + params['k'] = features.shape[0] # number of vertices + params['distance_upper_bound'] = radius + distances, neighbors = tree.query(features, **params) + if kind == 'knn': + return neighbors, distances + elif kind == 'radius': + dist = [] + neigh = [] + for distance, neighbor in zip(distances, neighbors): + mask = (distance != np.inf) + dist.append(distance[mask]) + neigh.append(neighbor[mask]) + return neigh, dist + + +def _flann(features, metric, order, kind, k, radius, params): + if metric == 'max_dist': + raise ValueError('flann gives wrong results for metric="max_dist".') + try: + import cyflann as cfl + except Exception as e: + raise ImportError('Cannot import cyflann. Choose another nearest ' + 'neighbors backend or try to install it with ' + 'pip (or conda) install cyflann. ' + 'Original exception: {}'.format(e)) + cfl.set_distance_type(metric, order=order) + index = cfl.FLANNIndex() + index.build_index(features, **params) + # I tried changing the algorithm and testing performance on huge matrices, + # but the default parameters seems to work best. + if kind == 'knn': + neighbors, distances = index.nn_index(features, k+1) + if metric == 'euclidean': + np.sqrt(distances, out=distances) + elif metric == 'minkowski': + np.power(distances, 1/order, out=distances) + elif kind == 'radius': + distances = [] + neighbors = [] + if metric == 'euclidean': + radius = radius**2 + elif metric == 'minkowski': + radius = radius**order + n_vertices, _ = features.shape + for vertex in range(n_vertices): + neighbor, distance = index.nn_radius(features[vertex, :], radius) + distances.append(distance) + neighbors.append(neighbor) + if metric == 'euclidean': + distances = list(map(np.sqrt, distances)) + elif metric == 'minkowski': + distances = list(map(lambda d: np.power(d, 1/order), distances)) + index.free_index() + return neighbors, distances + + +def _nmslib(features, metric, order, kind, k, _, params): + if kind == 'radius': + raise ValueError('nmslib does not support kind="radius".') + if metric == 'minkowski': + raise ValueError('nmslib does not support metric="minkowski".') + try: + import nmslib as nms + except Exception as e: + raise ImportError('Cannot import nmslib. Choose another nearest ' + 'neighbors backend or try to install it with ' + 'pip (or conda) install nmslib. ' + 'Original exception: {}'.format(e)) + n_vertices, _ = features.shape + params_index = params.pop('index', None) + params_query = params.pop('query', None) + metric = 'l2' if metric == 'euclidean' else metric + metric = 'l1' if metric == 'manhattan' else metric + metric = 'linf' if metric == 'max_dist' else metric + index = nms.init(space=metric, **params) + index.addDataPointBatch(features) + index.createIndex(params_index) + if params_query is not None: + index.setQueryTimeParams(params_query) + results = index.knnQueryBatch(features, k=k+1) + neighbors, distances = zip(*results) + distances = np.concatenate(distances).reshape(n_vertices, k+1) + neighbors = np.concatenate(neighbors).reshape(n_vertices, k+1) + return neighbors, distances + +def nearest_neighbor(features, metric='euclidean', order=2, kind='knn', k=10, radius=None, backend='scipy-ckdtree', **kwargs): + '''Find nearest neighboors. + + Parameters + ---------- + features : data numpy array + metric : {'euclidean', 'manhattan', 'minkowski', 'max_dist'}, optional + Metric used to compute pairwise distances. + + * ``'euclidean'`` defines pairwise distances as + :math:`d(v_i, v_j) = \| x_i - x_j \|_2`. + * ``'manhattan'`` defines pairwise distances as + :math:`d(v_i, v_j) = \| x_i - x_j \|_1`. + * ``'minkowski'`` generalizes the above and defines distances as + :math:`d(v_i, v_j) = \| x_i - x_j \|_p` + where :math:`p` is the ``order`` of the norm. + * ``'max_dist'`` defines pairwise distances as + :math:`d(v_i, v_j) = \| x_i - x_j \|_\infty = \max(x_i - x_j)`, where + the maximum is taken over the elements of the vector. + + More metrics may be supported for some backends. + Please refer to the documentation of the chosen backend. + kind : 'knn' or 'radius' (default 'knn') + k : number of nearest neighboors if 'knn' is selected + radius : radius of the search if 'radius' is slected + + order : float, optional + The order of the Minkowski distance for ``metric='minkowski'``. + backend : string, optional + * ``'scipy-pdist'`` uses :func:`scipy.spatial.distance.pdist` to + compute pairwise distances. The method is brute force and computes + all distances. That is the slowest method. + * ``'scipy-kdtree'`` uses :class:`scipy.spatial.KDTree`. The method + builds a k-d tree to prune the number of pairwise distances it has to + compute. That is an efficient strategy for low-dimensional spaces. + * ``'scipy-ckdtree'`` uses :class:`scipy.spatial.cKDTree`. The same as + ``'scipy-kdtree'`` but with C bindings, which should be faster. + That is the default. + * ``'flann'`` uses the `Fast Library for Approximate Nearest Neighbors + (FLANN) `_. That method is an + approximation. + * ``'nmslib'`` uses the `Non-Metric Space Library (NMSLIB) + `_. That method is an + approximation. It should be the fastest in high-dimensional spaces. + + You can look at this `benchmark + `_ to get an idea of the + relative performance of those backends. It's nonetheless wise to run + some tests on your own data. + ''' + if kind=='knn': + radius = None + elif kind=='radius': + k = None + else: + raise ValueError('"kind" must be "knn" or "radius"') + + _orders = { + 'euclidean': 2, + 'manhattan': 1, + 'max_dist': np.inf, + 'minkowski': order, + } + order = _orders.pop(metric, None) + try: + function = globals()['_' + backend.replace('-', '_')] + except KeyError: + raise ValueError('Invalid backend "{}".'.format(backend)) + neighbors, distances = function(features, metric, order, + kind, k, radius, kwargs) + return neighbors, distances + + +def sparse_distance_matrix(neighbors, distances, symmetrize=True, safe=False, kind = None): + '''Build a sparse distance matrix from nearest neighbors''' + n_edges = [len(n) - 1 for n in neighbors] # remove distance to self + if safe and kind is None: + raise ValueError('Please specify "kind" to "knn" or "radius" to use the safe mode') + + if safe and kind == 'radius': + n_disconnected = np.sum(np.asarray(n_edges) == 0) + if n_disconnected > 0: + _logger.warning('{} points (out of {}) have no neighboors. ' + 'Consider increasing the radius or setting ' + 'kind=knn.'.format(n_disconnected, n_vertices)) + + value = np.empty(sum(n_edges), dtype=np.float) + row = np.empty_like(value, dtype=np.int) + col = np.empty_like(value, dtype=np.int) + start = 0 + n_vertices = len(n_edges) + for vertex in range(n_vertices): + if safe and kind == 'knn': + assert n_edges[vertex] == k + end = start + n_edges[vertex] + value[start:end] = distances[vertex][1:] + row[start:end] = np.full(n_edges[vertex], vertex) + col[start:end] = neighbors[vertex][1:] + start = end + W = sparse.csr_matrix((value, (row, col)), (n_vertices, n_vertices)) + if symmetrize: + # Enforce symmetry. May have been broken by k-NN. Checking symmetry + # with np.abs(W - W.T).sum() is as costly as the symmetrization itself. + W = utils.symmetrize(W, method='fill') + return W \ No newline at end of file diff --git a/pygsp/filters/approximations.py b/pygsp/filters/approximations.py index 28b57acd..2c383760 100644 --- a/pygsp/filters/approximations.py +++ b/pygsp/filters/approximations.py @@ -121,16 +121,16 @@ def cheby_rect(G, bounds, signal, **kwargs): Parameters ---------- G : Graph - bounds : array-like + bounds : array_like The bounds of the pass-band filter - signal : array-like + signal : array_like Signal to filter order : int (optional) Order of the Chebyshev polynomial (default: 30) Returns ------- - r : array-like + r : array_like Result of the filtering """ diff --git a/pygsp/filters/filter.py b/pygsp/filters/filter.py index 8fbf44d7..d7d71528 100644 --- a/pygsp/filters/filter.py +++ b/pygsp/filters/filter.py @@ -116,7 +116,7 @@ def evaluate(self, x): Parameters ---------- - x : ndarray + x : array_like Graph frequencies at which to evaluate the filter. Returns @@ -138,6 +138,7 @@ def evaluate(self, x): [] """ + x = np.asanyarray(x) # Avoid to copy data as with np.array([g(x) for g in self._kernels]). y = np.empty([self.Nf] + list(x.shape)) for i, kernel in enumerate(self._kernels): @@ -179,7 +180,7 @@ def filter(self, s, method='chebyshev', order=30): Parameters ---------- - s : ndarray + s : array_like Graph signals, a tensor of shape ``(N_NODES, N_SIGNALS, N_FEATURES)``, where ``N_NODES`` is the number of nodes in the graph, ``N_SIGNALS`` the number of independent signals you want to @@ -254,7 +255,7 @@ def filter(self, s, method='chebyshev', order=30): >>> _ = G.plot(s1, ax=axes[0]) >>> _ = G.plot(s2, ax=axes[1]) >>> print('{:.5f}'.format(np.linalg.norm(s1 - s2))) - 0.26808 + 0.26995 Perfect reconstruction with Itersine, a tight frame: @@ -265,9 +266,7 @@ def filter(self, s, method='chebyshev', order=30): True """ - if s.shape[0] != self.G.N: - raise ValueError('First dimension should be the number of nodes ' - 'G.N = {}, got {}.'.format(self.G.N, s.shape)) + s = self.G._check_signal(s) # TODO: not in self.Nin (Nf = Nin x Nout). if s.ndim == 1 or s.shape[-1] not in [1, self.Nf]: @@ -407,16 +406,16 @@ def estimate_frame_bounds(self, x=None): As :math:`g(L) = U g(\Lambda) U^\top` is diagonalized by the Fourier basis :math:`U` with eigenvalues :math:`\Lambda`, :math:`\| g(L) x \|^2 - = \| g(\Lambda) U x \|^2`, and :math:`A = \min g^2(\Lambda)`, + = \| g(\Lambda) U^\top x \|^2`, and :math:`A = \min g^2(\Lambda)`, :math:`B = \max g^2(\Lambda)`. Parameters ---------- - x : ndarray + x : array_like Graph frequencies at which to evaluate the filter bank `g(x)`. - The default is `x = np.linspace(0, G.lmax, 1000)`. + The default is ``x = np.linspace(0, G.lmax, 1000)``. The exact bounds are given by evaluating the filter bank at the - eigenvalues of the graph Laplacian, i.e., `x = G.e`. + eigenvalues of the graph Laplacian, i.e., ``x = G.e``. Returns ------- @@ -425,7 +424,7 @@ def estimate_frame_bounds(self, x=None): B : float Upper frame bound of the filter bank. - See also + See Also -------- compute_frame: compute the frame complement: complement a filter bank to become a tight frame @@ -449,7 +448,7 @@ def estimate_frame_bounds(self, x=None): A=1.708, B=2.359 >>> A, B = g.estimate_frame_bounds(G.e) >>> print('A={:.3f}, B={:.3f}'.format(A, B)) - A=1.723, B=2.359 + A=1.839, B=2.359 The frame bounds can be seen in the plot of the filter bank as the minimum and maximum of their squared sum (the black curve): @@ -499,6 +498,8 @@ def estimate_frame_bounds(self, x=None): """ if x is None: x = np.linspace(0, self.G.lmax, 1000) + else: + x = np.asanyarray(x) sum_filters = np.sum(self.evaluate(x)**2, axis=0) @@ -516,7 +517,7 @@ def compute_frame(self, **kwargs): *analysis operator* .. math:: - g(L) = \begin{pmatrix} g_0(L) \\ \vdots \\ g_F(L) \end{pmatrix} + g(L) = \begin{pmatrix} g_1(L) \\ \vdots \\ g_F(L) \end{pmatrix} \in \mathbb{R}^{NF \times N}, \quad g_i(L) = U g_i(\Lambda) U^\top, @@ -550,7 +551,7 @@ def compute_frame(self, **kwargs): frame : ndarray Array of size (#nodes x #filters) x #nodes. - See also + See Also -------- estimate_frame_bounds: estimate the frame bounds filter: more efficient way to filter signals @@ -573,6 +574,7 @@ def compute_frame(self, **kwargs): >>> s1 = gL.dot(s) >>> s1 = s1.reshape(G.N, -1, order='F') >>> +<<<<<<< HEAD >>> s2 = g.filter(s) >>> np.all(np.abs(s1 - s2) < 1e-10) True @@ -588,6 +590,10 @@ def compute_frame(self, **kwargs): >>> gL.shape (600, 100) >>> np.all(gL.T.dot(gL) - np.identity(G.N) < 1e-10) +======= + >>> s2 = f.filter(s) + >>> np.all(np.abs(s1 - s2) < 1e-10) +>>>>>>> checking for small difference between matrix require an absolute value True """ @@ -618,7 +624,7 @@ def complement(self, frame_bound=None): complement: Filter The complementary filter. - See also + See Also -------- estimate_frame_bounds: estimate the frame bounds @@ -662,14 +668,21 @@ def kernel(x, *args, **kwargs): def inverse(self): r"""Return the pseudo-inverse filter bank. - The pseudo-inverse of the filter bank :math:`g` is the filter bank - :math:`g^+` such that + The pseudo-inverse of the *analysis filter bank* :math:`g` is the + *synthesis filter bank* :math:`g^+` such that .. math:: g(L)^+ g(L) = I, - where :math:`I` is the identity matrix, and :math:`g(L)^+ = (g(L)\top - g(L))^{-1} g(L)^\top` is the left pseudo-inverse of the analysis - operator :math:`g(L)`. + where :math:`I` is the identity matrix, and the *synthesis operator* + + .. math:: g(L)^+ = (g(L)\top g(L))^{-1} g(L)^\top + = (g_1(L)^+, \dots, g_F(L)^+) + \in \mathbb{R}^{N \times NF} + + is the left pseudo-inverse of the analysis operator :math:`g(L)`. Note + that :math:`g_i(L)^+` is the pseudo-inverse of :math:`g_i(L)`, + :math:`N` is the number of vertices, and :math:`F` is the number of + filters in the bank. The above relation holds, and the reconstruction is exact, if and only if :math:`g(L)` is a frame. To be a frame, the rows of :math:`g(L)` @@ -679,19 +692,24 @@ def inverse(self): will be the closest to :math:`x` in the least square sense. While there exists infinitely many inverses of the analysis operator of - a frame, the pseudo-inverse is unique and corresponds to the canonical - dual of the filter kernel. + a frame, the pseudo-inverse is unique and corresponds to the *canonical + dual* of the filter kernel. + + The *frame operator* of :math:`g^+` is :math:`g(L)^+ (g(L)^+)^\top = + (g(L)\top g(L))^{-1}`, the inverse of the frame operator of :math:`g`. + Similarly, its *frame bounds* are :math:`A^{-1}` and :math:`B^{-1}`, + where :math:`A` and :math:`B` are the frame bounds of :math:`g`. - If the frame is tight (i.e., :math:`A=B`), the canonical dual filters - are given by :math:`h_i = g_i / A`, where :math:`g_i` are the filters - composing the filter bank :math:`g`. + If :math:`g` is tight (i.e., :math:`A=B`), the canonical dual is given + by :math:`g^+ = g / A` (i.e., :math:`g^+_i = g_i / A \ \forall i`). Returns ------- - inverse: Filter - The pseudo-inverse filter bank. + inverse : :class:`pygsp.filters.Filter` + The pseudo-inverse filter bank, which synthesizes (or reconstructs) + a signal from its coefficients using the canonical dual frame. - See also + See Also -------- estimate_frame_bounds: estimate the frame bounds diff --git a/pygsp/filters/mexicanhat.py b/pygsp/filters/mexicanhat.py index 92740aa6..e0c43c56 100644 --- a/pygsp/filters/mexicanhat.py +++ b/pygsp/filters/mexicanhat.py @@ -32,7 +32,7 @@ class MexicanHat(Filter): lpfactor : float Low-pass factor. lmin=lmax/lpfactor will be used to determine scales. The scaling function will be created to fill the low-pass gap. - scales : array-like + scales : array_like Scales to be used. By default, initialized with :func:`pygsp.utils.compute_log_scales`. normalize : bool diff --git a/pygsp/filters/meyer.py b/pygsp/filters/meyer.py index 4c03c703..e54f5314 100644 --- a/pygsp/filters/meyer.py +++ b/pygsp/filters/meyer.py @@ -61,7 +61,7 @@ def kernel(x, kernel_type): * meyer scaling function kernel: supported on [0,4/3] """ - x = np.asarray(x) + x = np.asanyarray(x) l1 = 2/3. l2 = 4/3. # 2*l1 diff --git a/pygsp/filters/modulation.py b/pygsp/filters/modulation.py index 9483648f..60833850 100644 --- a/pygsp/filters/modulation.py +++ b/pygsp/filters/modulation.py @@ -55,9 +55,9 @@ class Modulation(Filter): modulation_first : bool First modulate then localize the kernel if True, first localize then modulate if False. The two operators do not commute. This setting only - applies to `filter`, `evaluate` only performs modulation (the filter - would otherwise have a different spectrum depending on where it is - localized). + applies to :meth:`filter`. :meth:`evaluate` only performs modulation, + as the filter would otherwise have a different spectrum depending on + where it is localized. See Also -------- diff --git a/pygsp/filters/simpletight.py b/pygsp/filters/simpletight.py index c5aad96b..4e26bf06 100644 --- a/pygsp/filters/simpletight.py +++ b/pygsp/filters/simpletight.py @@ -19,7 +19,7 @@ class SimpleTight(Filter): G : graph Nf : int Number of filters to cover the interval [0, lmax]. - scales : array-like + scales : array_like Scales to be used. Defaults to log scale. Examples diff --git a/pygsp/filters/wave.py b/pygsp/filters/wave.py index 60137c24..551e1b4e 100644 --- a/pygsp/filters/wave.py +++ b/pygsp/filters/wave.py @@ -115,7 +115,7 @@ def __init__(self, G, time=10, speed=1): raise ValueError('If both parameters are iterable, ' 'they should have the same length.') - if np.any(np.asarray(speed) >= 2): + if np.any(np.asanyarray(speed) >= 2): raise ValueError('The wave propagation speed should be in [0, 2[') def kernel(x, time, speed): diff --git a/pygsp/graphs/__init__.py b/pygsp/graphs/__init__.py index 90b94ca9..8e1b327b 100644 --- a/pygsp/graphs/__init__.py +++ b/pygsp/graphs/__init__.py @@ -75,9 +75,9 @@ .. autosummary:: - Graph.check_weights Graph.is_connected Graph.is_directed + Graph.has_loops Plotting -------- @@ -87,12 +87,40 @@ Graph.plot Graph.plot_spectrogram +Import and export (I/O) +----------------------- + +We provide import and export facility to two well-known Python packages for +network analysis: NetworkX_ and graph-tool_. +Those packages and the PyGSP are fundamentally different in their goals (graph +analysis versus graph signal analysis) and graph representations (if in the +PyGSP everything is an ndarray, in NetworkX everything is a dictionary). +Those tools are complementary and good interoperability is necessary to exploit +the strengths of each tool. +We ourselves leverage NetworkX and graph-tool to save and load graphs. + +Note: to tie a signal with the graph, such that they are exported together, +attach it first with :meth:`Graph.set_signal`. + +.. _NetworkX: https://networkx.github.io +.. _graph-tool: https://graph-tool.skewed.de + +.. autosummary:: + + Graph.load + Graph.save + Graph.from_networkx + Graph.to_networkx + Graph.from_graphtool + Graph.to_graphtool + Others ------ .. autosummary:: Graph.get_edge_list + Graph.set_signal Graph.set_coordinates Graph.subgraph Graph.extract_components @@ -100,6 +128,15 @@ Graph models ============ +In addition to the below graphs, useful resources are the random graph +generators from NetworkX (see `NetworkX's documentation`_) and graph-tool (see +:mod:`graph_tool.generation`), as well as graph-tool's assortment of standard +networks (see :mod:`graph_tool.collection`). +Any graph created by NetworkX or graph-tool can be imported in the PyGSP with +:meth:`Graph.from_networkx` and :meth:`Graph.from_graphtool`. + +.. _NetworkX's documentation: https://networkx.github.io/documentation/stable/reference/generators.html + .. autosummary:: Airfoil @@ -156,6 +193,7 @@ 'RandomRegular', 'RandomRing', 'Ring', + 'SphereEquiangular', 'StochasticBlockModel', 'SwissRoll', 'Torus' @@ -168,6 +206,8 @@ 'Grid2dImgPatches', 'Sensor', 'Sphere', + 'SphereHealpix', + 'SphereIcosahedron', 'TwoMoons' ] diff --git a/pygsp/graphs/_io.py b/pygsp/graphs/_io.py new file mode 100644 index 00000000..3ac774c1 --- /dev/null +++ b/pygsp/graphs/_io.py @@ -0,0 +1,593 @@ +# -*- coding: utf-8 -*- + +import os + +import numpy as np + + +def _import_networkx(): + try: + import networkx as nx + except Exception as e: + raise ImportError('Cannot import networkx. Use graph-tool or try to ' + 'install it with pip (or conda) install networkx. ' + 'Original exception: {}'.format(e)) + return nx + + +def _import_graphtool(): + try: + import graph_tool as gt + except Exception as e: + raise ImportError('Cannot import graph-tool. Use networkx or try to ' + 'install it. Original exception: {}'.format(e)) + return gt + + +class IOMixIn(object): + + def _break_signals(self): + r"""Break N-dimensional signals into N 1D signals.""" + for name in list(self.signals.keys()): + if self.signals[name].ndim == 2: + for i, signal_1d in enumerate(self.signals[name].T): + self.signals[name + '_' + str(i)] = signal_1d + del self.signals[name] + + def _join_signals(self): + r"""Join N 1D signals into one N-dimensional signal.""" + joined = dict() + for name in self.signals: + name_base = name.rsplit('_', 1)[0] + names = joined.get(name_base, list()) + names.append(name) + joined[name_base] = names + for name_base, names in joined.items(): + if len(names) > 1: + names = sorted(names) # ensure dim ordering (_0, _1, etc.) + signal_nd = np.stack([self.signals[n] for n in names], axis=1) + self.signals[name_base] = signal_nd + for name in names: + del self.signals[name] + + def to_networkx(self): + r"""Export the graph to NetworkX. + + Edge weights are stored as an edge attribute, + under the name "weight". + + Signals are stored as node attributes, + under their name in the :attr:`signals` dictionary. + `N`-dimensional signals are broken into `N` 1-dimensional signals. + They will eventually be joined back together on import. + + Returns + ------- + graph : :class:`networkx.Graph` + A NetworkX graph object. + + See Also + -------- + to_graphtool : export to graph-tool + save : save to a file + + Examples + -------- + >>> import networkx as nx + >>> from matplotlib import pyplot as plt + >>> graph = graphs.Path(4, directed=True) + >>> graph.set_signal(np.full(4, 2.3), 'signal') + >>> graph = graph.to_networkx() + >>> print(nx.info(graph)) + Name: Path + Type: DiGraph + Number of nodes: 4 + Number of edges: 3 + Average in degree: 0.7500 + Average out degree: 0.7500 + >>> nx.is_directed(graph) + True + >>> graph.nodes() + NodeView((0, 1, 2, 3)) + >>> graph.edges() + OutEdgeView([(0, 1), (1, 2), (2, 3)]) + >>> graph.nodes()[2] + {'signal': 2.3} + >>> graph.edges()[(0, 1)] + {'weight': 1.0} + >>> # nx.draw(graph, with_labels=True) + + Another common goal is to use NetworkX to compute some properties to be + be imported back in the PyGSP as signals. + + >>> import networkx as nx + >>> from matplotlib import pyplot as plt + >>> graph = graphs.Sensor(100, seed=42) + >>> graph.set_signal(graph.coords, 'coords') + >>> graph = graph.to_networkx() + >>> betweenness = nx.betweenness_centrality(graph, weight='weight') + >>> nx.set_node_attributes(graph, betweenness, 'betweenness') + >>> graph = graphs.Graph.from_networkx(graph) + >>> graph.compute_fourier_basis() + >>> graph.set_coordinates(graph.signals['coords']) + >>> fig, axes = plt.subplots(1, 2) + >>> _ = graph.plot(graph.signals['betweenness'], ax=axes[0]) + >>> _ = axes[1].plot(graph.e, graph.gft(graph.signals['betweenness'])) + + """ + nx = _import_networkx() + + def convert(number): + # NetworkX accepts arbitrary python objects as attributes, but: + # * the GEXF writer does not accept any NumPy types (on signals), + # * the GraphML writer does not accept NumPy ints. + if issubclass(number.dtype.type, (np.integer, np.bool_)): + return int(number) + else: + return float(number) + + def edges(): + for source, target, weight in zip(*self.get_edge_list()): + yield int(source), int(target), {'weight': convert(weight)} + + def nodes(): + for vertex in range(self.n_vertices): + signals = {name: convert(signal[vertex]) + for name, signal in self.signals.items()} + yield vertex, signals + + self._break_signals() + graph = nx.DiGraph() if self.is_directed() else nx.Graph() + graph.add_nodes_from(nodes()) + graph.add_edges_from(edges()) + graph.name = self.__class__.__name__ + return graph + + def to_graphtool(self): + r"""Export the graph to graph-tool. + + Edge weights are stored as an edge property map, + under the name "weight". + + Signals are stored as vertex property maps, + under their name in the :attr:`signals` dictionary. + `N`-dimensional signals are broken into `N` 1-dimensional signals. + They will eventually be joined back together on import. + + Returns + ------- + graph : :class:`graph_tool.Graph` + A graph-tool graph object. + + See Also + -------- + to_networkx : export to NetworkX + save : save to a file + + Examples + -------- + >>> import graph_tool as gt + >>> import graph_tool.draw + >>> from matplotlib import pyplot as plt + >>> graph = graphs.Path(4, directed=True) + >>> graph.set_signal(np.full(4, 2.3), 'signal') + >>> graph = graph.to_graphtool() + >>> graph.is_directed() + True + >>> graph.vertex_properties['signal'][2] + 2.3 + >>> graph.edge_properties['weight'][(0, 1)] + 1.0 + >>> # gt.draw.graph_draw(graph, vertex_text=graph.vertex_index) + + Another common goal is to use graph-tool to compute some properties to + be imported back in the PyGSP as signals. + + >>> import graph_tool as gt + >>> import graph_tool.centrality + >>> from matplotlib import pyplot as plt + >>> graph = graphs.Sensor(100, seed=42) + >>> graph.set_signal(graph.coords, 'coords') + >>> graph = graph.to_graphtool() + >>> vprop, eprop = gt.centrality.betweenness( + ... graph, weight=graph.edge_properties['weight']) + >>> graph.vertex_properties['betweenness'] = vprop + >>> graph = graphs.Graph.from_graphtool(graph) + >>> graph.compute_fourier_basis() + >>> graph.set_coordinates(graph.signals['coords']) + >>> fig, axes = plt.subplots(1, 2) + >>> _ = graph.plot(graph.signals['betweenness'], ax=axes[0]) + >>> _ = axes[1].plot(graph.e, graph.gft(graph.signals['betweenness'])) + + """ + + # See gt.value_types() for the list of accepted types. + # See the definition of _type_alias() for a list of aliases. + # Mapping from https://docs.scipy.org/doc/numpy/user/basics.types.html. + convert = { + np.bool_: 'bool', + np.int8: 'int8_t', + np.int16: 'int16_t', + np.int32: 'int32_t', + np.int64: 'int64_t', + np.short: 'short', + np.intc: 'int', + np.uintc: 'unsigned int', + np.long: 'long', + np.longlong: 'long long', + np.uint: 'unsigned long', + np.single: 'float', + np.double: 'double', + np.longdouble: 'long double', + } + + gt = _import_graphtool() + graph = gt.Graph(directed=self.is_directed()) + + sources, targets, weights = self.get_edge_list() + graph.add_edge_list(np.asarray((sources, targets)).T) + try: + dtype = convert[weights.dtype.type] + except KeyError: + raise TypeError("Type {} of the edge weights is not supported." + .format(weights.dtype)) + prop = graph.new_edge_property(dtype) + prop.get_array()[:] = weights + graph.edge_properties['weight'] = prop + + self._break_signals() + for name, signal in self.signals.items(): + try: + dtype = convert[signal.dtype.type] + except KeyError: + raise TypeError("Type {} of signal {} is not supported." + .format(signal.dtype, name)) + prop = graph.new_vertex_property(dtype) + prop.get_array()[:] = signal + graph.vertex_properties[name] = prop + + return graph + + @classmethod + def from_networkx(cls, graph, weight='weight'): + r"""Import a graph from NetworkX. + + Edge weights are retrieved as an edge attribute, + under the name specified by the ``weight`` parameter. + + Signals are retrieved from node attributes, + and stored in the :attr:`signals` dictionary under the attribute name. + `N`-dimensional signals that were broken during export are joined. + + Parameters + ---------- + graph : :class:`networkx.Graph` + A NetworkX graph object. + weight : string or None, optional + The edge attribute that holds the numerical values used as the edge + weights. All edge weights are set to 1 if None, or not found. + + Returns + ------- + graph : :class:`~pygsp.graphs.Graph` + A PyGSP graph object. + + Notes + ----- + + The nodes are ordered according to :meth:`networkx.Graph.nodes`. + + In NetworkX, node attributes need not be set for every node. + If a node attribute is not set for a node, a NaN is assigned to the + corresponding signal for that node. + + If the graph is a :class:`networkx.MultiGraph`, multiedges are + aggregated by summation. + + See Also + -------- + from_graphtool : import from graph-tool + load : load from a file + + Examples + -------- + >>> import networkx as nx + >>> graph = nx.Graph() + >>> graph.add_edge(1, 2, weight=0.2) + >>> graph.add_edge(2, 3, weight=0.9) + >>> graph.add_node(4, sig=3.1416) + >>> graph.nodes() + NodeView((1, 2, 3, 4)) + >>> graph = graphs.Graph.from_networkx(graph) + >>> graph.W.toarray() + array([[0. , 0.2, 0. , 0. ], + [0.2, 0. , 0.9, 0. ], + [0. , 0.9, 0. , 0. ], + [0. , 0. , 0. , 0. ]]) + >>> graph.signals + {'sig': array([ nan, nan, nan, 3.1416])} + + """ + nx = _import_networkx() + from .graph import Graph + + adjacency = nx.to_scipy_sparse_matrix(graph, weight=weight) + graph_pg = Graph(adjacency) + + for i, node in enumerate(graph.nodes()): + for name in graph.nodes[node].keys(): + try: + signal = graph_pg.signals[name] + except KeyError: + signal = np.full(graph_pg.n_vertices, np.nan) + graph_pg.set_signal(signal, name) + try: + signal[i] = graph.nodes[node][name] + except KeyError: + pass # attribute not set for node + + graph_pg._join_signals() + return graph_pg + + @classmethod + def from_graphtool(cls, graph, weight='weight'): + r"""Import a graph from graph-tool. + + Edge weights are retrieved as an edge property, + under the name specified by the ``weight`` parameter. + + Signals are retrieved from node properties, + and stored in the :attr:`signals` dictionary under the property name. + `N`-dimensional signals that were broken during export are joined. + + Parameters + ---------- + graph : :class:`graph_tool.Graph` + A graph-tool graph object. + weight : string + The edge property that holds the numerical values used as the edge + weights. All edge weights are set to 1 if None, or not found. + + Returns + ------- + graph : :class:`~pygsp.graphs.Graph` + A PyGSP graph object. + + Notes + ----- + + If the graph has multiple edge connecting the same two nodes, a sum + over the edges is taken to merge them. + + See Also + -------- + from_networkx : import from NetworkX + load : load from a file + + Examples + -------- + >>> import graph_tool as gt + >>> graph = gt.Graph(directed=False) + >>> e1 = graph.add_edge(0, 1) + >>> e2 = graph.add_edge(1, 2) + >>> v = graph.add_vertex() + >>> eprop = graph.new_edge_property("double") + >>> eprop[e1] = 0.2 + >>> eprop[(1, 2)] = 0.9 + >>> graph.edge_properties["weight"] = eprop + >>> vprop = graph.new_vertex_property("double", val=np.nan) + >>> vprop[3] = 3.1416 + >>> graph.vertex_properties["sig"] = vprop + >>> graph = graphs.Graph.from_graphtool(graph) + >>> graph.W.toarray() + array([[0. , 0.2, 0. , 0. ], + [0.2, 0. , 0.9, 0. ], + [0. , 0.9, 0. , 0. ], + [0. , 0. , 0. , 0. ]]) + >>> graph.signals + {'sig': PropertyArray([ nan, nan, nan, 3.1416])} + + """ + gt = _import_graphtool() + import graph_tool.spectral + from .graph import Graph + + weight = graph.edge_properties.get(weight, None) + adjacency = gt.spectral.adjacency(graph, weight=weight) + graph_pg = Graph(adjacency.T) + + for name, signal in graph.vertex_properties.items(): + graph_pg.set_signal(signal.get_array(), name) + + graph_pg._join_signals() + return graph_pg + + @classmethod + def load(cls, path, fmt=None, backend=None): + r"""Load a graph from a file. + + Edge weights are retrieved as an edge attribute named "weight". + + Signals are retrieved from node attributes, + and stored in the :attr:`signals` dictionary under the attribute name. + `N`-dimensional signals that were broken during export are joined. + + Parameters + ---------- + path : string + Path to the file from which to load the graph. + fmt : {'graphml', 'gml', 'gexf', None}, optional + Format in which the graph is saved. + Guessed from the filename extension if None. + backend : {'networkx', 'graph-tool', None}, optional + Library used to load the graph. Automatically chosen if None. + + Returns + ------- + graph : :class:`Graph` + The loaded graph. + + See Also + -------- + save : save a graph to a file + from_networkx : load with NetworkX then import in the PyGSP + from_graphtool : load with graph-tool then import in the PyGSP + + Notes + ----- + + A lossless round-trip is only guaranteed if the graph (and its signals) + is saved and loaded with the same backend. + + Loading from other formats is possible by loading in NetworkX or + graph-tool, and importing to the PyGSP. + The proposed formats are however tested for faithful round-trips. + + Examples + -------- + >>> graph = graphs.Logo() + >>> graph.save('logo.graphml') + >>> graph = graphs.Graph.load('logo.graphml') + >>> import os + >>> os.remove('logo.graphml') + + """ + + if fmt is None: + fmt = os.path.splitext(path)[1][1:] + if fmt not in ['graphml', 'gml', 'gexf']: + raise ValueError('Unsupported format {}.'.format(fmt)) + + def load_networkx(path, fmt): + nx = _import_networkx() + load = getattr(nx, 'read_' + fmt) + graph = load(path) + return cls.from_networkx(graph) + + def load_graphtool(path, fmt): + gt = _import_graphtool() + graph = gt.load_graph(path, fmt=fmt) + return cls.from_graphtool(graph) + + if backend == 'networkx': + return load_networkx(path, fmt) + elif backend == 'graph-tool': + return load_graphtool(path, fmt) + elif backend is None: + try: + return load_networkx(path, fmt) + except ImportError: + try: + return load_graphtool(path, fmt) + except ImportError: + raise ImportError('Cannot import networkx nor graph-tool.') + else: + raise ValueError('Unknown backend {}.'.format(backend)) + + def save(self, path, fmt=None, backend=None): + r"""Save the graph to a file. + + Edge weights are stored as an edge attribute, + under the name "weight". + + Signals are stored as node attributes, + under their name in the :attr:`signals` dictionary. + `N`-dimensional signals are broken into `N` 1-dimensional signals. + They will eventually be joined back together on import. + + Supported formats are: + + * GraphML_, a comprehensive XML format. + `Wikipedia `_. + Supported by NetworkX_, graph-tool_, NetworKit_, igraph_, Gephi_, + Cytoscape_, SocNetV_. + * GML_ (Graph Modelling Language), a simple non-XML format. + `Wikipedia `_. + Supported by NetworkX_, graph-tool_, NetworKit_, igraph_, Gephi_, + Cytoscape_, SocNetV_, Tulip_. + * GEXF_ (Graph Exchange XML Format), Gephi's XML format. + Supported by NetworkX_, NetworKit_, Gephi_, Tulip_, ngraph_. + + If unsure, we recommend GraphML_. + + .. _GraphML: http://graphml.graphdrawing.org + .. _GML: http://www.infosun.fim.uni-passau.de/Graphlet/GML/gml-tr.html + .. _GEXF: https://gephi.org/gexf/format + .. _NetworkX: https://networkx.github.io + .. _graph-tool: https://graph-tool.skewed.de + .. _NetworKit: https://networkit.github.io + .. _igraph: https://igraph.org + .. _ngraph: https://github.com/anvaka/ngraph + .. _Gephi: https://gephi.org + .. _Cytoscape: https://cytoscape.org + .. _SocNetV: https://socnetv.org + .. _Tulip: http://tulip.labri.fr + + Parameters + ---------- + path : string + Path to the file where the graph is to be saved. + fmt : {'graphml', 'gml', 'gexf', None}, optional + Format in which to save the graph. + Guessed from the filename extension if None. + backend : {'networkx', 'graph-tool', None}, optional + Library used to load the graph. Automatically chosen if None. + + See Also + -------- + load : load a graph from a file + to_networkx : export as a NetworkX graph, and save with NetworkX + to_graphtool : export as a graph-tool graph, and save with graph-tool + + Notes + ----- + + A lossless round-trip is only guaranteed if the graph (and its signals) + is saved and loaded with the same backend. + + Saving in other formats is possible by exporting to NetworkX or + graph-tool, and using their respective saving functionality. + The proposed formats are however tested for faithful round-trips. + + Edge weights and signal values are rounded at the sixth decimal when + saving in ``fmt='gml'`` with ``backend='graph-tool'``. + + Examples + -------- + >>> graph = graphs.Logo() + >>> graph.save('logo.graphml') + >>> graph = graphs.Graph.load('logo.graphml') + >>> import os + >>> os.remove('logo.graphml') + + """ + + if fmt is None: + fmt = os.path.splitext(path)[1][1:] + if fmt not in ['graphml', 'gml', 'gexf']: + raise ValueError('Unsupported format {}.'.format(fmt)) + + def save_networkx(graph, path, fmt): + nx = _import_networkx() + graph = graph.to_networkx() + save = getattr(nx, 'write_' + fmt) + save(graph, path) + + def save_graphtool(graph, path, fmt): + graph = graph.to_graphtool() + graph.save(path, fmt=fmt) + + if backend == 'networkx': + save_networkx(self, path, fmt) + elif backend == 'graph-tool': + save_graphtool(self, path, fmt) + elif backend is None: + try: + save_networkx(self, path, fmt) + except ImportError: + try: + save_graphtool(self, path, fmt) + except ImportError: + raise ImportError('Cannot import networkx nor graph-tool.') + else: + raise ValueError('Unknown backend {}.'.format(backend)) diff --git a/pygsp/graphs/_layout.py b/pygsp/graphs/_layout.py new file mode 100644 index 00000000..31127fd1 --- /dev/null +++ b/pygsp/graphs/_layout.py @@ -0,0 +1,203 @@ +# -*- coding: utf-8 -*- + +import numpy as np + + +class LayoutMixIn(object): + + def set_coordinates(self, kind='spring', **kwargs): + r"""Set node's coordinates (their position when plotting). + + Parameters + ---------- + kind : string or array_like + Kind of coordinates to generate. It controls the position of the + nodes when plotting the graph. Can either pass an array of size Nx2 + or Nx3 to set the coordinates manually or the name of a layout + algorithm. Available algorithms: community2D, random2D, random3D, + ring2D, line1D, spring, laplacian_eigenmap2D, laplacian_eigenmap3D. + Default is 'spring'. + kwargs : dict + Additional parameters to be passed to the Fruchterman-Reingold + force-directed algorithm when kind is spring. + + Examples + -------- + >>> G = graphs.ErdosRenyi() + >>> G.set_coordinates() + >>> fig, ax = G.plot() + + """ + + if not isinstance(kind, str): + coords = np.asanyarray(kind).squeeze() + check_1d = (coords.ndim == 1) + check_2d_3d = (coords.ndim == 2) and (2 <= coords.shape[1] <= 3) + if coords.shape[0] != self.N or not (check_1d or check_2d_3d): + raise ValueError('Expecting coordinates to be of size N, Nx2, ' + 'or Nx3.') + self.coords = coords + + elif kind == 'line1D': + self.coords = np.arange(self.N) + + elif kind == 'line2D': + x, y = np.arange(self.N), np.zeros(self.N) + self.coords = np.stack([x, y], axis=1) + + elif kind == 'ring2D': + angle = np.arange(self.N) * 2 * np.pi / self.N + self.coords = np.stack([np.cos(angle), np.sin(angle)], axis=1) + + elif kind == 'random2D': + self.coords = np.random.uniform(size=(self.N, 2)) + + elif kind == 'random3D': + self.coords = np.random.uniform(size=(self.N, 3)) + + elif kind == 'spring': + self.coords = self._fruchterman_reingold_layout(**kwargs) + + elif kind == 'community2D': + if not hasattr(self, 'info') or 'node_com' not in self.info: + ValueError('Missing arguments to the graph to be able to ' + 'compute community coordinates.') + + if 'world_rad' not in self.info: + self.info['world_rad'] = np.sqrt(self.N) + + if 'comm_sizes' not in self.info: + counts = Counter(self.info['node_com']) + self.info['comm_sizes'] = np.array([cnt[1] for cnt + in sorted(counts.items())]) + + Nc = self.info['comm_sizes'].shape[0] + + self.info['com_coords'] = self.info['world_rad'] * \ + np.array(list(zip( + np.cos(2 * np.pi * np.arange(1, Nc + 1) / Nc), + np.sin(2 * np.pi * np.arange(1, Nc + 1) / Nc)))) + + # Coordinates of the nodes inside their communities + coords = np.random.rand(self.N, 2) + self.coords = np.array([[elem[0] * np.cos(2 * np.pi * elem[1]), + elem[0] * np.sin(2 * np.pi * elem[1])] + for elem in coords]) + + for i in range(self.N): + # Set coordinates as an offset from the center of the community + # it belongs to + comm_idx = self.info['node_com'][i] + comm_rad = np.sqrt(self.info['comm_sizes'][comm_idx]) + self.coords[i] = self.info['com_coords'][comm_idx] + \ + comm_rad * self.coords[i] + elif kind == 'laplacian_eigenmap2D': + self.compute_fourier_basis(n_eigenvectors=3) + self.coords = self.U[:, 1:3] + elif kind == 'laplacian_eigenmap3D': + self.compute_fourier_basis(n_eigenvectors=4) + self.coords = self.U[:, 1:4] + else: + raise ValueError('Unexpected argument kind={}.'.format(kind)) + + def _fruchterman_reingold_layout(self, dim=2, k=None, pos=None, fixed=[], + iterations=50, scale=1.0, center=None, + seed=None): + # TODO doc + # fixed: list of nodes with fixed coordinates + # Position nodes using Fruchterman-Reingold force-directed algorithm. + + if center is None: + center = np.zeros((1, dim)) + + if np.shape(center)[1] != dim: + self.logger.error('Spring coordinates: center has wrong size.') + center = np.zeros((1, dim)) + + if pos is None: + dom_size = 1 + pos_arr = None + else: + # Determine size of existing domain to adjust initial positions + dom_size = np.max(pos) + pos_arr = np.random.RandomState(seed).uniform(size=(self.N, dim)) + pos_arr = pos_arr * dom_size + center + for i in range(self.N): + pos_arr[i] = np.asanyarray(pos[i]) + + if k is None and len(fixed) > 0: + # We must adjust k by domain size for layouts that are not near 1x1 + k = dom_size / np.sqrt(self.N) + + pos = _sparse_fruchterman_reingold(self.A, dim, k, pos_arr, + fixed, iterations, seed) + + if len(fixed) == 0: + pos = _rescale_layout(pos, scale=scale) + center + + return pos + + +def _sparse_fruchterman_reingold(A, dim, k, pos, fixed, iterations, seed): + # Position nodes in adjacency matrix A using Fruchterman-Reingold + nnodes = A.shape[0] + + # make sure we have a LIst of Lists representation + try: + A = A.tolil() + except Exception: + A = (sparse.coo_matrix(A)).tolil() + + if pos is None: + # random initial positions + pos = np.random.RandomState(seed).uniform(size=(nnodes, dim)) + + # optimal distance between nodes + if k is None: + k = np.sqrt(1.0 / nnodes) + + # simple cooling scheme. + # linearly step down by dt on each iteration so last iteration is size dt. + t = 0.1 + dt = t / float(iterations + 1) + + displacement = np.zeros((dim, nnodes)) + for iteration in range(iterations): + displacement *= 0 + # loop over rows + for i in range(nnodes): + if i in fixed: + continue + # difference between this row's node position and all others + delta = (pos[i] - pos).T + # distance between points + distance = np.sqrt((delta**2).sum(axis=0)) + # enforce minimum distance of 0.01 + distance = np.where(distance < 0.01, 0.01, distance) + # the adjacency matrix row + Ai = A[i, :].toarray() + # displacement "force" + displacement[:, i] += \ + (delta * (k * k / distance**2 - Ai * distance / k)).sum(axis=1) + # update positions + length = np.sqrt((displacement**2).sum(axis=0)) + length = np.where(length < 0.01, 0.1, length) + pos += (displacement * t / length).T + # cool temperature + t -= dt + + return pos + + +def _rescale_layout(pos, scale=1): + # rescale to (-scale, scale) in all axes + + # shift origin to (0,0) + lim = 0 # max coordinate for all axes + for i in range(pos.shape[1]): + pos[:, i] -= pos[:, i].mean() + lim = max(pos[:, i].max(), lim) + # rescale to (-scale,scale) in all directions, preserves aspect + for i in range(pos.shape[1]): + pos[:, i] *= scale / lim + return pos diff --git a/pygsp/graphs/airfoil.py b/pygsp/graphs/airfoil.py index 538da2be..0bbc8140 100644 --- a/pygsp/graphs/airfoil.py +++ b/pygsp/graphs/airfoil.py @@ -34,5 +34,5 @@ def __init__(self, **kwargs): "limits": np.array([-1e-4, 1.01*data['x'].max(), -1e-4, 1.01*data['y'].max()])} - super(Airfoil, self).__init__(W=W, coords=coords, plotting=plotting, + super(Airfoil, self).__init__(W, coords=coords, plotting=plotting, **kwargs) diff --git a/pygsp/graphs/barabasialbert.py b/pygsp/graphs/barabasialbert.py index 1a70a2a9..0f2a11bb 100644 --- a/pygsp/graphs/barabasialbert.py +++ b/pygsp/graphs/barabasialbert.py @@ -63,7 +63,7 @@ def __init__(self, N=1000, m0=1, m=1, seed=None, **kwargs): W[elem, i] = 1 W[i, elem] = 1 - super(BarabasiAlbert, self).__init__(W=W, **kwargs) + super(BarabasiAlbert, self).__init__(W, **kwargs) def _get_extra_repr(self): return dict(m0=self.m0, m=self.m, seed=self.seed) diff --git a/pygsp/graphs/comet.py b/pygsp/graphs/comet.py index 045565ca..b716b1d1 100644 --- a/pygsp/graphs/comet.py +++ b/pygsp/graphs/comet.py @@ -59,7 +59,7 @@ def __init__(self, N=32, k=12, **kwargs): np.min(tmpcoords[:, 1]), np.max(tmpcoords[:, 1])])} - super(Comet, self).__init__(W=W, coords=tmpcoords, + super(Comet, self).__init__(W, coords=tmpcoords, plotting=plotting, **kwargs) def _get_extra_repr(self): diff --git a/pygsp/graphs/community.py b/pygsp/graphs/community.py index 2d5271dc..b10cf4d4 100644 --- a/pygsp/graphs/community.py +++ b/pygsp/graphs/community.py @@ -236,7 +236,7 @@ def __init__(self, for key, value in {'Nc': Nc, 'info': info}.items(): setattr(self, key, value) - super(Community, self).__init__(W=W, coords=coords, **kwargs) + super(Community, self).__init__(W, coords=coords, **kwargs) def _get_extra_repr(self): attrs = {'Nc': self.Nc, diff --git a/pygsp/graphs/davidsensornet.py b/pygsp/graphs/davidsensornet.py index e6c2b1d9..84a3d9c9 100644 --- a/pygsp/graphs/davidsensornet.py +++ b/pygsp/graphs/davidsensornet.py @@ -57,7 +57,7 @@ def __init__(self, N=64, seed=None, **kwargs): plotting = {"limits": [0, 1, 0, 1]} - super(DavidSensorNet, self).__init__(W=W, coords=coords, + super(DavidSensorNet, self).__init__(W, coords=coords, plotting=plotting, **kwargs) def _get_extra_repr(self): diff --git a/pygsp/graphs/difference.py b/pygsp/graphs/difference.py index 40fba222..7bd54ef7 100644 --- a/pygsp/graphs/difference.py +++ b/pygsp/graphs/difference.py @@ -11,7 +11,7 @@ logger = utils.build_logger(__name__) -class GraphDifference(object): +class DifferenceMixIn(object): @property def D(self): @@ -19,7 +19,7 @@ def D(self): Is computed by :func:`compute_differential_operator`. """ - if not hasattr(self, '_D'): + if self._D is None: self.logger.warning('The differential operator G.D is not ' 'available, we need to compute it. Explicitly ' 'call G.compute_differential_operator() ' @@ -84,7 +84,7 @@ def compute_differential_operator(self): The result is cached and accessible by the :attr:`D` property. - See also + See Also -------- grad : compute the gradient div : compute the divergence @@ -95,12 +95,11 @@ def compute_differential_operator(self): The difference operator is an incidence matrix. Example with a undirected graph. - >>> adjacency = np.array([ + >>> graph = graphs.Graph([ ... [0, 2, 0], ... [2, 0, 1], ... [0, 1, 0], ... ]) - >>> graph = graphs.Graph(adjacency) >>> graph.compute_laplacian('combinatorial') >>> graph.compute_differential_operator() >>> graph.D.toarray() @@ -116,12 +115,11 @@ def compute_differential_operator(self): Example with a directed graph. - >>> adjacency = np.array([ + >>> graph = graphs.Graph([ ... [0, 2, 0], ... [2, 0, 1], ... [0, 0, 0], ... ]) - >>> graph = graphs.Graph(adjacency) >>> graph.compute_laplacian('combinatorial') >>> graph.compute_differential_operator() >>> graph.D.toarray() @@ -168,7 +166,7 @@ def compute_differential_operator(self): self._D = sparse.csc_matrix((values, (rows, columns)), shape=(self.n_vertices, self.n_edges)) - self.D.eliminate_zeros() # Self-loops introduce stored zeros. + self._D.eliminate_zeros() # Self-loops introduce stored zeros. def grad(self, x): r"""Compute the gradient of a signal defined on the vertices. @@ -199,7 +197,7 @@ def grad(self, x): Parameters ---------- - x : ndarray + x : array_like Signal of length :attr:`n_vertices` living on the vertices. Returns @@ -207,7 +205,7 @@ def grad(self, x): y : ndarray Gradient signal of length :attr:`n_edges` living on the edges. - See also + See Also -------- compute_differential_operator div : compute the divergence of an edge signal @@ -215,29 +213,37 @@ def grad(self, x): Examples -------- + + Non-directed graph and combinatorial Laplacian: + >>> graph = graphs.Path(4, directed=False, lap_type='combinatorial') >>> graph.compute_differential_operator() - >>> graph.grad(np.array([0, 2, 4, 2])) + >>> graph.grad([0, 2, 4, 2]) array([ 2., 2., -2.]) + Directed graph and combinatorial Laplacian: + >>> graph = graphs.Path(4, directed=True, lap_type='combinatorial') >>> graph.compute_differential_operator() - >>> graph.grad(np.array([0, 2, 4, 2])) + >>> graph.grad([0, 2, 4, 2]) array([ 1.41421356, 1.41421356, -1.41421356]) + Non-directed graph and normalized Laplacian: + >>> graph = graphs.Path(4, directed=False, lap_type='normalized') >>> graph.compute_differential_operator() - >>> graph.grad(np.array([0, 2, 4, 2])) + >>> graph.grad([0, 2, 4, 2]) array([ 1.41421356, 1.41421356, -0.82842712]) + Directed graph and normalized Laplacian: + >>> graph = graphs.Path(4, directed=True, lap_type='normalized') >>> graph.compute_differential_operator() - >>> graph.grad(np.array([0, 2, 4, 2])) + >>> graph.grad([0, 2, 4, 2]) array([ 1.41421356, 1.41421356, -0.82842712]) """ - if self.N != x.shape[0]: - raise ValueError('Signal length should be the number of nodes.') + x = self._check_signal(x) return self.D.T.dot(x) def div(self, y): @@ -273,7 +279,7 @@ def div(self, y): Parameters ---------- - y : ndarray + y : array_like Signal of length :attr:`n_edges` living on the edges. Returns @@ -282,34 +288,45 @@ def div(self, y): Divergence signal of length :attr:`n_vertices` living on the vertices. - See also + See Also -------- compute_differential_operator grad : compute the gradient of a vertex signal Examples -------- + + Non-directed graph and combinatorial Laplacian: + >>> graph = graphs.Path(4, directed=False, lap_type='combinatorial') >>> graph.compute_differential_operator() - >>> graph.div(np.array([2, -2, 0])) + >>> graph.div([2, -2, 0]) array([-2., 4., -2., 0.]) + Directed graph and combinatorial Laplacian: + >>> graph = graphs.Path(4, directed=True, lap_type='combinatorial') >>> graph.compute_differential_operator() - >>> graph.div(np.array([2, -2, 0])) + >>> graph.div([2, -2, 0]) array([-1.41421356, 2.82842712, -1.41421356, 0. ]) + Non-directed graph and normalized Laplacian: + >>> graph = graphs.Path(4, directed=False, lap_type='normalized') >>> graph.compute_differential_operator() - >>> graph.div(np.array([2, -2, 0])) + >>> graph.div([2, -2, 0]) array([-2. , 2.82842712, -1.41421356, 0. ]) + Directed graph and normalized Laplacian: + >>> graph = graphs.Path(4, directed=True, lap_type='normalized') >>> graph.compute_differential_operator() - >>> graph.div(np.array([2, -2, 0])) + >>> graph.div([2, -2, 0]) array([-2. , 2.82842712, -1.41421356, 0. ]) """ - if self.Ne != y.shape[0]: - raise ValueError('Signal length should be the number of edges.') + y = np.asanyarray(y) + if y.shape[0] != self.Ne: + raise ValueError('First dimension must be the number of edges ' + 'G.Ne = {}, got {}.'.format(self.Ne, y.shape)) return self.D.dot(y) diff --git a/pygsp/graphs/fourier.py b/pygsp/graphs/fourier.py index 80f55769..3c215273 100644 --- a/pygsp/graphs/fourier.py +++ b/pygsp/graphs/fourier.py @@ -9,10 +9,10 @@ logger = utils.build_logger(__name__) -class GraphFourier(object): +class FourierMixIn(object): def _check_fourier_properties(self, name, desc): - if not hasattr(self, '_' + name): + if getattr(self, '_' + name) is None: self.logger.warning('The {} G.{} is not available, we need to ' 'compute the Fourier basis. Explicitly call ' 'G.compute_fourier_basis() once beforehand ' @@ -40,19 +40,26 @@ def e(self): def coherence(self): r"""Coherence of the Fourier basis. - The mutual coherence between the basis of Kronecker deltas on the graph - and the basis of graph Laplacian eigenvectors is defined as + The *mutual coherence* between the basis of Kronecker deltas and the + basis formed by the eigenvectors of the graph Laplacian is defined as - .. math:: \mu = \max_{\ell,i} | \langle U_\ell, \delta_i \rangle | + .. math:: \mu = \max_{\ell,i} \langle U_\ell, \delta_i \rangle = \max_{\ell,i} | U_{\ell, i} | - \in \left[ \frac{1}{\sqrt{N}}, 1 \right]. + \in \left[ \frac{1}{\sqrt{N}}, 1 \right], - It is a measure of the localization of the Fourier modes (Laplacian - eigenvectors). The smaller the value, the more localized the + where :math:`N` is the number of vertices, :math:`\delta_i \in + \mathbb{R}^N` denotes the Kronecker delta that is non-zero on vertex + :math:`v_i`, and :math:`U_\ell \in \mathbb{R}^N` denotes the + :math:`\ell^\text{th}` eigenvector of the graph Laplacian (i.e., the + :math:`\ell^\text{th}` Fourier mode). + + The coherence is a measure of the localization of the Fourier modes + (Laplacian eigenvectors). The larger the value, the more localized the eigenvectors can be. The extreme is a node that is disconnected from the rest of the graph: an eigenvector will be localized as a Kronecker - delta there. In the classical setting, Fourier modes (which are complex - exponentials) are completely delocalized, and the coherence equals one. + delta there (i.e., :math:`\mu = 1`). In the classical setting, Fourier + modes (which are complex exponentials) are completely delocalized, and + the coherence is minimal. The value is computed by :meth:`compute_fourier_basis`. @@ -74,22 +81,23 @@ def coherence(self): Localized eigenvectors. - >>> graph = graphs.Sensor(64, seed=20) + >>> graph = graphs.Sensor(64, seed=10) >>> graph.compute_fourier_basis() >>> minimum = 1 / np.sqrt(graph.n_vertices) >>> print('{:.2f} in [{:.2f}, 1]'.format(graph.coherence, minimum)) - 0.88 in [0.12, 1] + 0.75 in [0.12, 1] >>> >>> # Plot the most localized eigenvector. >>> import matplotlib.pyplot as plt - >>> idx = np.argmax(np.max(graph.U, axis=0)) - >>> _ = graph.plot(graph.U[:, idx]) + >>> idx = np.argmax(np.abs(graph.U)) + >>> idx_vertex, idx_fourier = np.unravel_index(idx, graph.U.shape) + >>> _ = graph.plot(graph.U[:, idx_fourier], highlight=idx_vertex) """ return self._check_fourier_properties('coherence', 'Fourier basis coherence') - def compute_fourier_basis(self, n_eigenvectors=None, recompute=False): + def compute_fourier_basis(self, n_eigenvectors=None): r"""Compute the (partial) Fourier basis of the graph (cached). The result is cached and accessible by the :attr:`U`, :attr:`e`, @@ -100,8 +108,6 @@ def compute_fourier_basis(self, n_eigenvectors=None, recompute=False): n_eigenvectors : int or `None` Number of eigenvectors to compute. If `None`, all eigenvectors are computed. (default: None) - recompute: bool - Force to recompute the Fourier basis if already existing. Notes ----- @@ -148,14 +154,13 @@ def compute_fourier_basis(self, n_eigenvectors=None, recompute=False): """ if n_eigenvectors is None: - n_eigenvectors = self.N + n_eigenvectors = self.n_vertices - if (hasattr(self, '_e') and hasattr(self, '_U') and not recompute - and n_eigenvectors <= len(self.e)): + if (self._U is not None and n_eigenvectors <= len(self._e)): return - assert self.L.shape == (self.N, self.N) - if self.N**2 * n_eigenvectors > 3000**3: + assert self.L.shape == (self.n_vertices, self.n_vertices) + if self.n_vertices**2 * n_eigenvectors > 3000**3: self.logger.warning( 'Computing the {0} eigendecomposition of a large matrix ({1} x' ' {1}) is expensive. Consider decreasing n_eigenvectors ' @@ -165,7 +170,7 @@ def compute_fourier_basis(self, n_eigenvectors=None, recompute=False): self.N)) # TODO: handle non-symmetric Laplacians. Test lap_type? - if n_eigenvectors == self.N: + if n_eigenvectors == self.n_vertices: self._e, self._U = np.linalg.eigh(self.L.toarray()) else: # fast partial eigendecomposition of hermitian matrices @@ -186,6 +191,7 @@ def compute_fourier_basis(self, n_eigenvectors=None, recompute=False): assert np.max(self._e) == self._e[-1] if n_eigenvectors == self.N: self._lmax = self._e[-1] + self._lmax_method = 'fourier' self._coherence = np.max(np.abs(self._U)) def gft(self, s): @@ -200,7 +206,7 @@ def gft(self, s): Parameters ---------- - s : ndarray + s : array_like Graph signal in the vertex domain. Returns @@ -219,9 +225,7 @@ def gft(self, s): True """ - if s.shape[0] != self.N: - raise ValueError('First dimension should be the number of nodes ' - 'G.N = {}, got {}.'.format(self.N, s.shape)) + s = self._check_signal(s) U = np.conjugate(self.U) # True Hermitian. (Although U is often real.) return np.tensordot(U, s, ([0], [0])) @@ -237,7 +241,7 @@ def igft(self, s_hat): Parameters ---------- - s_hat : ndarray + s_hat : array_like Graph signal in the Fourier domain. Returns @@ -256,7 +260,5 @@ def igft(self, s_hat): True """ - if s_hat.shape[0] != self.N: - raise ValueError('First dimension should be the number of nodes ' - 'G.N = {}, got {}.'.format(self.N, s_hat.shape)) + s_hat = self._check_signal(s_hat) return np.tensordot(self.U, s_hat, ([1], [0])) diff --git a/pygsp/graphs/fullconnected.py b/pygsp/graphs/fullconnected.py index eed61310..0f4c1dee 100644 --- a/pygsp/graphs/fullconnected.py +++ b/pygsp/graphs/fullconnected.py @@ -31,4 +31,4 @@ def __init__(self, N=10, **kwargs): W = np.ones((N, N)) - np.identity(N) plotting = {'limits': np.array([-1, 1, -1, 1])} - super(FullConnected, self).__init__(W=W, plotting=plotting, **kwargs) + super(FullConnected, self).__init__(W, plotting=plotting, **kwargs) diff --git a/pygsp/graphs/graph.py b/pygsp/graphs/graph.py index 39333c95..18b0fa95 100644 --- a/pygsp/graphs/graph.py +++ b/pygsp/graphs/graph.py @@ -1,108 +1,176 @@ # -*- coding: utf-8 -*- +from __future__ import division + +import os from collections import Counter import numpy as np from scipy import sparse from pygsp import utils -from . import fourier, difference # prevent circular import in Python < 3.5 +from .fourier import FourierMixIn +from .difference import DifferenceMixIn +from ._io import IOMixIn +from ._layout import LayoutMixIn -class Graph(fourier.GraphFourier, difference.GraphDifference): +class Graph(FourierMixIn, DifferenceMixIn, IOMixIn, LayoutMixIn): r"""Base graph class. - * Provide a common interface (and implementation) to graph objects. - * Can be instantiated to construct custom graphs from a weight matrix. + * Instantiate it to construct a graph from a (weighted) adjacency matrix. + * Provide a common interface (and implementation) for graph objects. * Initialize attributes for derived classes. Parameters ---------- - W : sparse matrix or ndarray - The weight matrix which encodes the graph. - lap_type : 'combinatorial', 'normalized' - The type of Laplacian to be computed by :func:`compute_laplacian` - (default is 'combinatorial'). - coords : ndarray - Vertices coordinates (default is None). + adjacency : sparse matrix or array_like + The (weighted) adjacency matrix of size n_vertices by n_vertices that + encodes the graph. + The data is copied except if it is a sparse matrix in CSR format. + lap_type : {'combinatorial', 'normalized'} + The kind of Laplacian to be computed by :meth:`compute_laplacian`. + coords : array_like + A matrix of size n_vertices by d that represents the coordinates of the + vertices in a d-dimensional embedding space. plotting : dict Plotting parameters. Attributes ---------- - N : int - the number of nodes / vertices in the graph. - Ne : int - the number of edges / links in the graph, i.e. connections between - nodes. - W : sparse matrix - the weight matrix which contains the weights of the connections. - It is represented as an N-by-N matrix of floats. - :math:`W_{i,j} = 0` means that there is no direct connection from - i to j. - L : sparse matrix - the graph Laplacian, an N-by-N matrix computed from W. + n_vertices or N : int + The number of vertices (nodes) in the graph. + n_edges or Ne : int + The number of edges (links) in the graph. + W : :class:`scipy.sparse.csr_matrix` + The adjacency matrix that contains the weights of the edges. + It is represented as an n_vertices by n_vertices matrix, where + :math:`W_{i,j}` is the weight of the edge :math:`(v_i, v_j)` from + vertex :math:`v_i` to vertex :math:`v_j`. :math:`W_{i,j} = 0` means + that there is no direct connection. + L : :class:`scipy.sparse.csr_matrix` + The graph Laplacian, an N-by-N matrix computed from W. lap_type : 'normalized', 'combinatorial' - the kind of Laplacian that was computed by :func:`compute_laplacian`. - coords : ndarray - vertices coordinates in 2D or 3D space. Used for plotting only. Default - is None. + The kind of Laplacian that was computed by :func:`compute_laplacian`. + signals : dict (string -> :class:`numpy.ndarray`) + Signals attached to the graph. + coords : :class:`numpy.ndarray` + Vertices coordinates in 2D or 3D space. Used for plotting only. plotting : dict - plotting parameters. + Plotting parameters. Examples -------- - >>> W = np.arange(4).reshape(2, 2) - >>> G = graphs.Graph(W) + + Define a simple graph. + + >>> graph = graphs.Graph([ + ... [0., 2., 0.], + ... [2., 0., 5.], + ... [0., 5., 0.], + ... ]) + >>> graph + Graph(n_vertices=3, n_edges=2) + >>> graph.n_vertices, graph.n_edges + (3, 2) + >>> graph.W.toarray() + array([[0., 2., 0.], + [2., 0., 5.], + [0., 5., 0.]]) + >>> graph.d + array([1, 2, 1], dtype=int32) + >>> graph.dw + array([2., 7., 5.]) + >>> graph.L.toarray() + array([[ 2., -2., 0.], + [-2., 7., -5.], + [ 0., -5., 5.]]) + + Add some coordinates to plot it. + + >>> import matplotlib.pyplot as plt + >>> graph.set_coordinates([ + ... [0, 0], + ... [0, 1], + ... [1, 0], + ... ]) + >>> fig, ax = graph.plot() """ - def __init__(self, W, lap_type='combinatorial', coords=None, plotting={}): + def __init__(self, adjacency, lap_type='combinatorial', coords=None, + plotting={}): self.logger = utils.build_logger(__name__) - if len(W.shape) != 2 or W.shape[0] != W.shape[1]: - raise ValueError('W has incorrect shape {}'.format(W.shape)) + if not sparse.isspmatrix(adjacency): + adjacency = np.asanyarray(adjacency) + + if (adjacency.ndim != 2) or (adjacency.shape[0] != adjacency.shape[1]): + raise ValueError('Adjacency: must be a square matrix.') # CSR sparse matrices are the most efficient for matrix multiplication. # They are the sole sparse matrix type to support eliminate_zeros(). - if sparse.isspmatrix_csr(W): - self.W = W - else: - self.W = sparse.csr_matrix(W) + self._adjacency = sparse.csr_matrix(adjacency, copy=False) - # Don't keep edges of 0 weight. Otherwise Ne will not correspond to the - # real number of edges. Problematic when e.g. plotting. - self.W.eliminate_zeros() + if np.isnan(self._adjacency.sum()): + raise ValueError('Adjacency: there is a Not a Number (NaN).') + if np.isinf(self._adjacency.sum()): + raise ValueError('Adjacency: there is an infinite value.') + if self.has_loops(): + self.logger.warning('Adjacency: there are self-loops ' + '(non-zeros on the diagonal). ' + 'The Laplacian will not see them.') + if (self._adjacency < 0).nnz != 0: + self.logger.warning('Adjacency: there are negative edge weights.') - self.n_vertices = W.shape[0] + self.n_vertices = self._adjacency.shape[0] - # TODO: why would we ever want this? - # For large matrices it slows the graph construction by a factor 100. - # self.W = sparse.lil_matrix(self.W) + # Don't keep edges of 0 weight. Otherwise n_edges will not correspond + # to the real number of edges. Problematic when plotting. + self._adjacency.eliminate_zeros() + + self._directed = None + self._connected = None # Don't count edges two times if undirected. # Be consistent with the size of the differential operator. if self.is_directed(): - self.n_edges = self.W.nnz + self.n_edges = self._adjacency.nnz else: - diagonal = np.count_nonzero(self.W.diagonal()) - off_diagonal = self.W.nnz - diagonal + diagonal = np.count_nonzero(self._adjacency.diagonal()) + off_diagonal = self._adjacency.nnz - diagonal self.n_edges = off_diagonal // 2 + diagonal - self.check_weights() - - self.compute_laplacian(lap_type) - if coords is not None: - self.coords = coords + # TODO: self.coords should be None if unset. + self.coords = np.asanyarray(coords) self.plotting = {'vertex_size': 100, 'vertex_color': (0.12, 0.47, 0.71, 0.5), 'edge_color': (0.5, 0.5, 0.5, 0.5), 'edge_width': 2, - 'edge_style': '-'} + 'edge_style': '-', + 'highlight_color': 'C1', + 'normalize_intercept': .25} self.plotting.update(plotting) + self.signals = dict() + + # Attributes that are lazily computed. + self._A = None + self._d = None + self._dw = None + self._lmax = None + self._lmax_method = None + self._U = None + self._e = None + self._coherence = None + self._D = None + # self._L = None + + # TODO: what about Laplacian? Lazy as Fourier, or disallow change? + self.lap_type = lap_type + self.compute_laplacian(lap_type) # TODO: kept for backward compatibility. self.Ne = self.n_edges @@ -122,6 +190,32 @@ def __repr__(self, limit=None): s += '{}={}, '.format(key, value) return '{}({})'.format(self.__class__.__name__, s[:-2]) + def set_signal(self, signal, name): + r"""Attach a signal to the graph. + + Attached signals can be accessed (and modified or deleted) through the + :attr:`signals` dictionary. + + Parameters + ---------- + signal : array_like + A sequence that assigns a value to each vertex. + The value of the signal at vertex `i` is ``signal[i]``. + name : String + Name of the signal used as a key in the :attr:`signals` dictionary. + + Examples + -------- + >>> graph = graphs.Sensor(10) + >>> signal = np.arange(graph.n_vertices) + >>> graph.set_signal(signal, 'mysignal') + >>> graph.signals + {'mysignal': array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])} + + """ + signal = self._check_signal(signal) + self.signals[name] = signal + def check_weights(self): r"""Check the characteristics of the weights matrix. @@ -182,7 +276,7 @@ def set_coordinates(self, kind='spring', **kwargs): Parameters ---------- - kind : string or array-like + kind : string or array_like Kind of coordinates to generate. It controls the position of the nodes when plotting the graph. Can either pass an array of size Nx2 or Nx3 to set the coordinates manually or the name of a layout @@ -202,7 +296,7 @@ def set_coordinates(self, kind='spring', **kwargs): """ if not isinstance(kind, str): - coords = np.asarray(kind).squeeze() + coords = np.asanyarray(kind).squeeze() check_1d = (coords.ndim == 1) check_2d_3d = (coords.ndim == 2) and (2 <= coords.shape[1] <= 3) if coords.shape[0] != self.N or not (check_1d or check_2d_3d): @@ -272,146 +366,195 @@ def set_coordinates(self, kind='spring', **kwargs): else: raise ValueError('Unexpected argument kind={}.'.format(kind)) - def subgraph(self, ind): - r"""Create a subgraph given indices. + def subgraph(self, vertices): + r"""Create a subgraph from a list of vertices. Parameters ---------- - ind : list - Nodes to keep + vertices : list + Vertices to keep. + Either a list of indices or an indicator function. Returns ------- - sub_G : Graph - Subgraph + subgraph : :class:`Graph` + Subgraph. Examples -------- - >>> W = np.arange(16).reshape(4, 4) - >>> G = graphs.Graph(W) - >>> ind = [1, 3] - >>> sub_G = G.subgraph(ind) + >>> graph = graphs.Graph([ + ... [0., 3., 0., 0.], + ... [3., 0., 4., 0.], + ... [0., 4., 0., 2.], + ... [0., 0., 2., 0.], + ... ]) + >>> graph = graph.subgraph([0, 2, 1]) + >>> graph.W.toarray() + array([[0., 0., 3.], + [0., 0., 4.], + [3., 4., 0.]]) """ - if not isinstance(ind, list) and not isinstance(ind, np.ndarray): - raise TypeError('The indices must be a list or a ndarray.') - - # N = len(ind) # Assigned but never used - - sub_W = self.W.tocsr()[ind, :].tocsc()[:, ind] - return Graph(sub_W) - - def is_connected(self, recompute=False): - r"""Check the strong connectivity of the graph (cached). - - It uses DFS travelling on graph to ensure that each node is visited. - For undirected graphs, starting at any vertex and trying to access all - others is enough. - For directed graphs, one needs to check that a random vertex is - accessible by all others - and can access all others. Thus, we can transpose the adjacency matrix - and compute again with the same starting point in both phases. - - Parameters - ---------- - recompute: bool - Force to recompute the connectivity if already known. + adjacency = self.W[vertices, :][:, vertices] + try: + coords = self.coords[vertices] + except AttributeError: + coords = None + graph = Graph(adjacency, self.lap_type, coords, self.plotting) + for name, signal in self.signals.items(): + graph.set_signal(signal[vertices], name) + return graph + + def is_connected(self): + r"""Check if the graph is connected (cached). + + A graph is connected if and only if there exists a (directed) path + between any two vertices. Returns ------- connected : bool - True if the graph is connected. + True if the graph is connected, False otherwise. + + Notes + ----- + + For undirected graphs, starting at a vertex and trying to visit all the + others is enough. + For directed graphs, one needs to check that a vertex can both be + visited by all the others and visit all the others. Examples -------- - >>> from scipy import sparse - >>> W = sparse.rand(10, 10, 0.2) - >>> G = graphs.Graph(W=W) - >>> connected = G.is_connected() + + Connected graph: + + >>> graph = graphs.Graph([ + ... [0, 3, 0, 0], + ... [3, 0, 4, 0], + ... [0, 4, 0, 2], + ... [0, 0, 2, 0], + ... ]) + >>> graph.is_connected() + True + + Disconnected graph: + + >>> graph = graphs.Graph([ + ... [0, 3, 0, 0], + ... [3, 0, 4, 0], + ... [0, 0, 0, 2], + ... [0, 0, 2, 0], + ... ]) + >>> graph.is_connected() + False + """ - if hasattr(self, '_connected') and not recompute: + if self._connected is not None: return self._connected - if self.is_directed(recompute=recompute): - adj_matrices = [self.A, self.A.T] - else: - adj_matrices = [self.A] + adjacencies = [self.W] + if self.is_directed(): + adjacencies.append(self.W.T) - for adj_matrix in adj_matrices: - visited = np.zeros(self.A.shape[0], dtype=bool) + for adjacency in adjacencies: + visited = np.zeros(self.n_vertices, dtype=np.bool) stack = set([0]) - while len(stack): - v = stack.pop() - if not visited[v]: - visited[v] = True + while stack: + vertex = stack.pop() - # Add indices of nodes not visited yet and accessible from - # v - stack.update(set([idx - for idx in adj_matrix[v, :].nonzero()[1] - if not visited[idx]])) + if visited[vertex]: + continue + visited[vertex] = True + + neighbors = adjacency[vertex].nonzero()[1] + stack.update(neighbors) - if not visited.all(): + if not np.all(visited): self._connected = False return self._connected self._connected = True return self._connected - def is_directed(self, recompute=False): + def is_directed(self): r"""Check if the graph has directed edges (cached). In this framework, we consider that a graph is directed if and - only if its weight matrix is non symmetric. - - Parameters - ---------- - recompute : bool - Force to recompute the directedness if already known. + only if its weight matrix is not symmetric. Returns ------- directed : bool - True if the graph is directed. - - Notes - ----- - Can also be used to check if a matrix is symmetrical + True if the graph is directed, False otherwise. Examples -------- Directed graph: - >>> adjacency = np.array([ + >>> graph = graphs.Graph([ ... [0, 3, 0], ... [3, 0, 4], ... [0, 0, 0], ... ]) - >>> graph = graphs.Graph(adjacency) >>> graph.is_directed() True Undirected graph: - >>> adjacency = np.array([ + >>> graph = graphs.Graph([ ... [0, 3, 0], ... [3, 0, 4], ... [0, 4, 0], ... ]) - >>> graph = graphs.Graph(adjacency) >>> graph.is_directed() False """ - if hasattr(self, '_directed') and not recompute: - return self._directed - - self._directed = np.abs(self.W - self.W.T).sum() != 0 + if self._directed is None: + self._directed = (self.W != self.W.T).nnz != 0 return self._directed + def has_loops(self): + r"""Check if any vertex is connected to itself. + + A graph has self-loops if and only if the diagonal entries of its + adjacency matrix are not all zero. + + Returns + ------- + loops : bool + True if the graph has self-loops, False otherwise. + + Examples + -------- + + Without self-loops: + + >>> graph = graphs.Graph([ + ... [0, 3, 0], + ... [3, 0, 4], + ... [0, 0, 0], + ... ]) + >>> graph.has_loops() + False + + With a self-loop: + + >>> graph = graphs.Graph([ + ... [1, 3, 0], + ... [3, 0, 4], + ... [0, 0, 0], + ... ]) + >>> graph.has_loops() + True + + """ + return np.any(self.W.diagonal() != 0) + def extract_components(self): r"""Split the graph into connected components. @@ -431,7 +574,7 @@ def extract_components(self): >>> from scipy import sparse >>> W = sparse.rand(10, 10, 0.2) >>> W = utils.symmetrize(W) - >>> G = graphs.Graph(W=W) + >>> G = graphs.Graph(W) >>> components = G.extract_components() >>> has_sinks = 'sink' in components[0].info >>> sinks_0 = components[0].info['sink'] if has_sinks else [] @@ -451,7 +594,8 @@ def extract_components(self): # indices = [] # Assigned but never used while not visited.all(): - stack = set(np.nonzero(~visited)[0]) + # pick a node not visted yet + stack = set(np.nonzero(~visited)[0][[0]]) comp = [] while len(stack): @@ -502,19 +646,18 @@ def compute_laplacian(self, lap_type='combinatorial'): Parameters ---------- lap_type : {'combinatorial', 'normalized'} - The type of Laplacian to compute. Default is combinatorial. + The kind of Laplacian to compute. Default is combinatorial. Examples -------- Combinatorial and normalized Laplacians of an undirected graph. - >>> adjacency = np.array([ + >>> graph = graphs.Graph([ ... [0, 2, 0], ... [2, 0, 1], ... [0, 1, 0], ... ]) - >>> graph = graphs.Graph(adjacency) >>> graph.compute_laplacian('combinatorial') >>> graph.L.toarray() array([[ 2., -2., 0.], @@ -528,12 +671,11 @@ def compute_laplacian(self, lap_type='combinatorial'): Combinatorial and normalized Laplacians of a directed graph. - >>> adjacency = np.array([ + >>> graph = graphs.Graph([ ... [0, 2, 0], ... [2, 0, 1], ... [0, 0, 0], ... ]) - >>> graph = graphs.Graph(adjacency) >>> graph.compute_laplacian('combinatorial') >>> graph.L.toarray() array([[ 2. , -2. , 0. ], @@ -562,12 +704,21 @@ def compute_laplacian(self, lap_type='combinatorial'): >>> -1e-10 < G.e[0] < 1e-10 < G.e[-1] < 2*np.max(G.dw) True >>> G.compute_laplacian('normalized') - >>> G.compute_fourier_basis(recompute=True) + >>> G.compute_fourier_basis() >>> -1e-10 < G.e[0] < 1e-10 < G.e[-1] < 2 True """ + if lap_type != self.lap_type: + # Those attributes are invalidated when the Laplacian is changed. + # Alternative: don't allow the user to change the Laplacian. + self._lmax = None + self._U = None + self._e = None + self._coherence = None + self._D = None + self.lap_type = lap_type if not self.is_directed(): @@ -589,6 +740,14 @@ def compute_laplacian(self, lap_type='combinatorial'): else: raise ValueError('Unknown Laplacian type {}'.format(lap_type)) + def _check_signal(self, s): + r"""Check if signal is valid.""" + s = np.asanyarray(s) + if s.shape[0] != self.n_vertices: + raise ValueError('First dimension must be the number of vertices ' + 'G.N = {}, got {}.'.format(self.N, s.shape)) + return s + def dirichlet_energy(self, x): r"""Compute the Dirichlet energy of a signal defined on the vertices. @@ -610,7 +769,7 @@ def dirichlet_energy(self, x): Parameters ---------- - x : ndarray + x : array_like Signal of length :attr:`n_vertices` living on the vertices. Returns @@ -618,14 +777,17 @@ def dirichlet_energy(self, x): energy : float The Dirichlet energy of the graph signal. - See also + See Also -------- grad : compute the gradient of a vertex signal Examples -------- + + Non-directed graph: + >>> graph = graphs.Path(5, directed=False) - >>> signal = np.array([0, 2, 2, 4, 4]) + >>> signal = [0, 2, 2, 4, 4] >>> graph.dirichlet_energy(signal) 8.0 >>> # The Dirichlet energy is indeed the squared norm of the gradient. @@ -633,8 +795,10 @@ def dirichlet_energy(self, x): >>> graph.grad(signal) array([2., 0., 2., 0.]) + Directed graph: + >>> graph = graphs.Path(5, directed=True) - >>> signal = np.array([0, 2, 2, 4, 4]) + >>> signal = [0, 2, 2, 4, 4] >>> graph.dirichlet_energy(signal) 4.0 >>> # The Dirichlet energy is indeed the squared norm of the gradient. @@ -643,8 +807,20 @@ def dirichlet_energy(self, x): array([1.41421356, 0. , 1.41421356, 0. ]) """ + x = self._check_signal(x) return x.T.dot(self.L.dot(x)) + @property + def W(self): + r"""Weighted adjacency matrix of the graph.""" + return self._adjacency + + @W.setter + def W(self, value): + # TODO: user can still do G.W[0, 0] = 1, or modify the passed W. + raise AttributeError('In-place modification of the graph is not ' + 'supported. Create another Graph object.') + @property def A(self): r"""Graph adjacency matrix (the binary version of W). @@ -653,20 +829,67 @@ def A(self): It is represented as an N-by-N matrix of booleans. :math:`A_{i,j}` is True if :math:`W_{i,j} > 0`. """ - if not hasattr(self, '_A'): + if self._A is None: self._A = self.W > 0 return self._A @property def d(self): - r"""The degree (the number of neighbors) of each node.""" - if not hasattr(self, '_d'): - self._d = np.asarray(self.A.sum(axis=1)).squeeze() + r"""The degree (number of neighbors) of vertices. + + For undirected graphs, the degree of a vertex is the number of vertices + it is connected to. + For directed graphs, the degree is the average of the in and out + degrees, where the in degree is the number of incoming edges, and the + out degree the number of outgoing edges. + + In both cases, the degree of the vertex :math:`v_i` is the average + between the number of non-zero values in the :math:`i`-th column (the + in degree) and the :math:`i`-th row (the out degree) of the weighted + adjacency matrix :attr:`W`. + + Examples + -------- + + Undirected graph: + + >>> graph = graphs.Graph([ + ... [0, 1, 0], + ... [1, 0, 2], + ... [0, 2, 0], + ... ]) + >>> print(graph.d) # Number of neighbors. + [1 2 1] + >>> print(graph.dw) # Weighted degree. + [1 3 2] + + Directed graph: + + >>> graph = graphs.Graph([ + ... [0, 1, 0], + ... [0, 0, 2], + ... [0, 2, 0], + ... ]) + >>> print(graph.d) # Number of neighbors. + [0.5 1.5 1. ] + >>> print(graph.dw) # Weighted degree. + [0.5 2.5 2. ] + + """ + if self._d is None: + if not self.is_directed(): + # Shortcut for undirected graphs. + self._d = self.W.getnnz(axis=1) + # axis=1 faster for CSR (https://stackoverflow.com/a/16391764) + else: + degree_in = self.W.getnnz(axis=0) + degree_out = self.W.getnnz(axis=1) + self._d = (degree_in + degree_out) / 2 return self._d @property def dw(self): - r"""The weighted degree of nodes. + r"""The weighted degree of vertices. For undirected graphs, the weighted degree of the vertex :math:`v_i` is defined as @@ -685,19 +908,40 @@ def dw(self): Examples -------- - >>> graphs.Path(4, directed=False).dw - array([1., 2., 2., 1.]) - >>> graphs.Path(4, directed=True).dw - array([0.5, 1. , 1. , 0.5]) + + Undirected graph: + + >>> graph = graphs.Graph([ + ... [0, 1, 0], + ... [1, 0, 2], + ... [0, 2, 0], + ... ]) + >>> print(graph.d) # Number of neighbors. + [1 2 1] + >>> print(graph.dw) # Weighted degree. + [1 3 2] + + Directed graph: + + >>> graph = graphs.Graph([ + ... [0, 1, 0], + ... [0, 0, 2], + ... [0, 2, 0], + ... ]) + >>> print(graph.d) # Number of neighbors. + [0.5 1.5 1. ] + >>> print(graph.dw) # Weighted degree. + [0.5 2.5 2. ] """ - if not hasattr(self, '_dw'): - if not self.is_directed: + if self._dw is None: + if not self.is_directed(): + # Shortcut for undirected graphs. self._dw = np.ravel(self.W.sum(axis=0)) else: degree_in = np.ravel(self.W.sum(axis=0)) degree_out = np.ravel(self.W.sum(axis=1)) - self._dw = 0.5 * (degree_in + degree_out) + self._dw = (degree_in + degree_out) / 2 return self._dw @property @@ -707,7 +951,7 @@ def lmax(self): Can be exactly computed by :func:`compute_fourier_basis` or approximated by :func:`estimate_lmax`. """ - if not hasattr(self, '_lmax'): + if self._lmax is None: self.logger.warning('The largest eigenvalue G.lmax is not ' 'available, we need to estimate it. ' 'Explicitly call G.estimate_lmax() or ' @@ -716,7 +960,7 @@ def lmax(self): self.estimate_lmax() return self._lmax - def estimate_lmax(self, method='lanczos', recompute=False): + def estimate_lmax(self, method='lanczos'): r"""Estimate the Laplacian's largest eigenvalue (cached). The result is cached and accessible by the :attr:`lmax` property. @@ -731,8 +975,6 @@ def estimate_lmax(self, method='lanczos', recompute=False): Whether to estimate the largest eigenvalue with the implicitly restarted Lanczos method, or to return an upper bound on the spectrum of the Laplacian. - recompute : boolean - Force to recompute the largest eigenvalue. Default is false. Notes ----- @@ -755,16 +997,17 @@ def estimate_lmax(self, method='lanczos', recompute=False): >>> G.compute_fourier_basis() # True value. >>> print('{:.2f}'.format(G.lmax)) 13.78 - >>> G.estimate_lmax(recompute=True) # Estimate. + >>> G.estimate_lmax(method='lanczos') # Estimate. >>> print('{:.2f}'.format(G.lmax)) 13.92 - >>> G.estimate_lmax(method='bounds', recompute=True) # Upper bound. + >>> G.estimate_lmax(method='bounds') # Upper bound. >>> print('{:.2f}'.format(G.lmax)) 18.58 """ - if hasattr(self, '_lmax') and not recompute: + if method == self._lmax_method: return + self._lmax_method = method if method == 'lanczos': try: @@ -852,24 +1095,22 @@ def get_edge_list(self): Edge list of a directed graph. - >>> adjacency = np.array([ + >>> graph = graphs.Graph([ ... [0, 3, 0], ... [3, 0, 4], ... [0, 0, 0], ... ]) - >>> graph = graphs.Graph(adjacency) >>> sources, targets, weights = graph.get_edge_list() >>> list(sources), list(targets), list(weights) ([0, 1, 1], [1, 0, 2], [3, 3, 4]) Edge list of an undirected graph. - >>> adjacency = np.array([ + >>> graph = graphs.Graph([ ... [0, 3, 0], ... [3, 0, 4], ... [0, 4, 0], ... ]) - >>> graph = graphs.Graph(adjacency) >>> sources, targets, weights = graph.get_edge_list() >>> list(sources), list(targets), list(weights) ([0, 1], [1, 2], [3, 4]) @@ -908,105 +1149,3 @@ def plot_spectrogram(self, node_idx=None): r"""Docstring overloaded at import time.""" from pygsp.plotting import _plot_spectrogram _plot_spectrogram(self, node_idx=node_idx) - - def _fruchterman_reingold_layout(self, dim=2, k=None, pos=None, fixed=[], - iterations=50, scale=1.0, center=None, - seed=None): - # TODO doc - # fixed: list of nodes with fixed coordinates - # Position nodes using Fruchterman-Reingold force-directed algorithm. - - if center is None: - center = np.zeros((1, dim)) - - if np.shape(center)[1] != dim: - self.logger.error('Spring coordinates: center has wrong size.') - center = np.zeros((1, dim)) - - if pos is None: - dom_size = 1 - pos_arr = None - else: - # Determine size of existing domain to adjust initial positions - dom_size = np.max(pos) - pos_arr = np.random.RandomState(seed).uniform(size=(self.N, dim)) - pos_arr = pos_arr * dom_size + center - for i in range(self.N): - pos_arr[i] = np.asarray(pos[i]) - - if k is None and len(fixed) > 0: - # We must adjust k by domain size for layouts that are not near 1x1 - k = dom_size / np.sqrt(self.N) - - pos = _sparse_fruchterman_reingold(self.A, dim, k, pos_arr, - fixed, iterations, seed) - - if len(fixed) == 0: - pos = _rescale_layout(pos, scale=scale) + center - - return pos - - -def _sparse_fruchterman_reingold(A, dim, k, pos, fixed, iterations, seed): - # Position nodes in adjacency matrix A using Fruchterman-Reingold - nnodes = A.shape[0] - - # make sure we have a LIst of Lists representation - try: - A = A.tolil() - except Exception: - A = (sparse.coo_matrix(A)).tolil() - - if pos is None: - # random initial positions - pos = np.random.RandomState(seed).uniform(size=(nnodes, dim)) - - # optimal distance between nodes - if k is None: - k = np.sqrt(1.0 / nnodes) - - # simple cooling scheme. - # linearly step down by dt on each iteration so last iteration is size dt. - t = 0.1 - dt = t / float(iterations + 1) - - displacement = np.zeros((dim, nnodes)) - for iteration in range(iterations): - displacement *= 0 - # loop over rows - for i in range(nnodes): - if i in fixed: - continue - # difference between this row's node position and all others - delta = (pos[i] - pos).T - # distance between points - distance = np.sqrt((delta**2).sum(axis=0)) - # enforce minimum distance of 0.01 - distance = np.where(distance < 0.01, 0.01, distance) - # the adjacency matrix row - Ai = np.asarray(A[i, :].toarray()) - # displacement "force" - displacement[:, i] += \ - (delta * (k * k / distance**2 - Ai * distance / k)).sum(axis=1) - # update positions - length = np.sqrt((displacement**2).sum(axis=0)) - length = np.where(length < 0.01, 0.1, length) - pos += (displacement * t / length).T - # cool temperature - t -= dt - - return pos - - -def _rescale_layout(pos, scale=1): - # rescale to (-scale, scale) in all axes - - # shift origin to (0,0) - lim = 0 # max coordinate for all axes - for i in range(pos.shape[1]): - pos[:, i] -= pos[:, i].mean() - lim = max(pos[:, i].max(), lim) - # rescale to (-scale,scale) in all directions, preserves aspect - for i in range(pos.shape[1]): - pos[:, i] *= scale / lim - return pos diff --git a/pygsp/graphs/grid2d.py b/pygsp/graphs/grid2d.py index 8277b27f..26bef79b 100644 --- a/pygsp/graphs/grid2d.py +++ b/pygsp/graphs/grid2d.py @@ -10,12 +10,23 @@ class Grid2d(Graph): r"""2-dimensional grid graph. + On the 2D grid, the graph Fourier transform (GFT) is the Kronecker product + between the GFT of two :class:`~pygsp.graphs.Path` graphs. + Parameters ---------- N1 : int Number of vertices along the first dimension. N2 : int - Number of vertices along the second dimension (default N1). + Number of vertices along the second dimension. Default is ``N1``. + diagonal : float + Value of the diagnal edges. Default is ``0.0`` + + See Also + -------- + Path : 1D line with even boundary conditions + Torus : Kronecker product of two ring graphs + Grid2dImgPatches Examples -------- @@ -27,7 +38,7 @@ class Grid2d(Graph): """ - def __init__(self, N1=16, N2=None, **kwargs): + def __init__(self, N1=16, N2=None, diagonal=0.0, **kwargs): if N2 is None: N2 = N1 @@ -42,11 +53,26 @@ def __init__(self, N1=16, N2=None, **kwargs): diag_1 = np.ones(N - 1) diag_1[(N2 - 1)::N2] = 0 diag_2 = np.ones(N - N2) + W = sparse.diags(diagonals=[diag_1, diag_2], offsets=[-1, -N2], shape=(N, N), format='csr', dtype='float') + + if min(N1, N2) > 1 and diagonal != 0.0: + # Connecting node with they diagonal neighbours + diag_3 = np.full(N - N2 - 1, diagonal) + diag_4 = np.full(N - N2 + 1, diagonal) + diag_3[N2 - 1::N2] = 0 + diag_4[0::N2] = 0 + D = sparse.diags(diagonals=[diag_3, diag_4], + offsets=[-N2 - 1, -N2 + 1], + shape=(N, N), + format='csr', + dtype='float') + W += D + W = utils.symmetrize(W, method='tril') x = np.kron(np.ones((N1, 1)), (np.arange(N2)/float(N2)).reshape(N2, 1)) @@ -57,7 +83,7 @@ def __init__(self, N1=16, N2=None, **kwargs): plotting = {"limits": np.array([-1. / N2, 1 + 1. / N2, 1. / N1, 1 + 1. / N1])} - super(Grid2d, self).__init__(W=W, coords=coords, + super(Grid2d, self).__init__(W, coords=coords, plotting=plotting, **kwargs) def _get_extra_repr(self): diff --git a/pygsp/graphs/logo.py b/pygsp/graphs/logo.py index aee7adb2..8d2c47e2 100644 --- a/pygsp/graphs/logo.py +++ b/pygsp/graphs/logo.py @@ -30,5 +30,5 @@ def __init__(self, **kwargs): plotting = {"limits": np.array([0, 640, -400, 0])} - super(Logo, self).__init__(W=data['W'], coords=data['coords'], + super(Logo, self).__init__(data['W'], coords=data['coords'], plotting=plotting, **kwargs) diff --git a/pygsp/graphs/lowstretchtree.py b/pygsp/graphs/lowstretchtree.py index bb347ab9..a55de736 100644 --- a/pygsp/graphs/lowstretchtree.py +++ b/pygsp/graphs/lowstretchtree.py @@ -67,7 +67,7 @@ def __init__(self, k=6, **kwargs): "vertex_size": 75, "limits": np.array([0, 2**k + 1, 0, 2**k + 1])} - super(LowStretchTree, self).__init__(W=W, + super(LowStretchTree, self).__init__(W, coords=coords, plotting=plotting, **kwargs) diff --git a/pygsp/graphs/minnesota.py b/pygsp/graphs/minnesota.py index 55c23c23..6e429c8e 100644 --- a/pygsp/graphs/minnesota.py +++ b/pygsp/graphs/minnesota.py @@ -53,7 +53,7 @@ def __init__(self, connected=True, **kwargs): # Binarize: 8 entries are equal to 2 instead of 1. A = (A > 0).astype(bool) - super(Minnesota, self).__init__(W=A, coords=data['xy'], + super(Minnesota, self).__init__(A, coords=data['xy'], plotting=plotting, **kwargs) def _get_extra_repr(self): diff --git a/pygsp/graphs/nngraphs/bunny.py b/pygsp/graphs/nngraphs/bunny.py index d9dac407..cc269c10 100644 --- a/pygsp/graphs/nngraphs/bunny.py +++ b/pygsp/graphs/nngraphs/bunny.py @@ -34,7 +34,5 @@ def __init__(self, **kwargs): 'distance': 8, } - super(Bunny, self).__init__(Xin=data['bunny'], - epsilon=0.02, NNtype='radius', - center=False, rescale=False, + super(Bunny, self).__init__(data['bunny'], kind='radius', radius=0.02, plotting=plotting, **kwargs) diff --git a/pygsp/graphs/nngraphs/cube.py b/pygsp/graphs/nngraphs/cube.py index 820e401c..b9cb1d60 100644 --- a/pygsp/graphs/nngraphs/cube.py +++ b/pygsp/graphs/nngraphs/cube.py @@ -10,12 +10,12 @@ class Cube(NNGraph): Parameters ---------- - radius : float - Edge lenght (default = 1) nb_pts : int Number of vertices (default = 300) nb_dim : int Dimension (default = 3) + length : float + Edge length (default = 1) sampling : string Variance of the distance kernel (default = 'random') (Can now only be 'random') @@ -35,20 +35,23 @@ class Cube(NNGraph): """ def __init__(self, - radius=1, nb_pts=300, nb_dim=3, + length=1, sampling='random', seed=None, **kwargs): - self.radius = radius self.nb_pts = nb_pts self.nb_dim = nb_dim + self.length = length self.sampling = sampling self.seed = seed rs = np.random.RandomState(seed) + if length != 1: + raise NotImplementedError('Only length=1 is implemented.') + if self.nb_dim > 3: raise NotImplementedError("Dimension > 3 not supported yet!") @@ -89,12 +92,10 @@ def __init__(self, 'distance': 9, } - super(Cube, self).__init__(Xin=pts, k=10, - center=False, rescale=False, - plotting=plotting, **kwargs) + super(Cube, self).__init__(pts, k=10, plotting=plotting, **kwargs) def _get_extra_repr(self): - attrs = {'radius': '{:.2f}'.format(self.radius), + attrs = {'length': '{:.2e}'.format(self.length), 'nb_pts': self.nb_pts, 'nb_dim': self.nb_dim, 'sampling': self.sampling, diff --git a/pygsp/graphs/nngraphs/grid2dimgpatches.py b/pygsp/graphs/nngraphs/grid2dimgpatches.py index d567748e..34168c00 100644 --- a/pygsp/graphs/nngraphs/grid2dimgpatches.py +++ b/pygsp/graphs/nngraphs/grid2dimgpatches.py @@ -17,6 +17,11 @@ class Grid2dImgPatches(Graph): kwargs : dict Parameters passed to :class:`ImgPatches`. + See Also + -------- + ImgPatches + Grid2d + Examples -------- >>> import matplotlib.pyplot as plt @@ -35,7 +40,7 @@ def __init__(self, img, aggregate=lambda Wp, Wg: Wp + Wg, **kwargs): self.Gp = ImgPatches(img, **kwargs) W = aggregate(self.Gp.W, self.Gg.W) - super(Grid2dImgPatches, self).__init__(W=W, + super(Grid2dImgPatches, self).__init__(W, coords=self.Gg.coords, plotting=self.Gg.plotting) diff --git a/pygsp/graphs/nngraphs/imgpatches.py b/pygsp/graphs/nngraphs/imgpatches.py index ea11be06..df681a3b 100644 --- a/pygsp/graphs/nngraphs/imgpatches.py +++ b/pygsp/graphs/nngraphs/imgpatches.py @@ -10,7 +10,8 @@ class ImgPatches(NNGraph): Extract a feature vector in the form of a patch for every pixel of an image, then construct a nearest-neighbor graph between these feature - vectors. The feature matrix, i.e. the patches, can be found in :attr:`Xin`. + vectors. The feature matrix, i.e., the patches, can be found in + :attr:`features`. Parameters ---------- @@ -22,6 +23,10 @@ class ImgPatches(NNGraph): kwargs : dict Parameters passed to :class:`NNGraph`. + See Also + -------- + Grid2dImgPatches + Notes ----- The feature vector of a pixel `i` will consist of the stacking of the @@ -35,9 +40,10 @@ class ImgPatches(NNGraph): >>> from skimage import data, img_as_float >>> img = img_as_float(data.camera()[::64, ::64]) >>> G = graphs.ImgPatches(img, patch_shape=(3, 3)) - >>> print('{} nodes ({} x {} pixels)'.format(G.Xin.shape[0], *img.shape)) + >>> N, d = G.patches.shape + >>> print('{} nodes ({} x {} pixels)'.format(N, *img.shape)) 64 nodes (8 x 8 pixels) - >>> print('{} features per node'.format(G.Xin.shape[1])) + >>> print('{} features per node'.format(d)) 9 features per node >>> G.set_coordinates(kind='spring', seed=42) >>> fig, axes = plt.subplots(1, 2) @@ -83,16 +89,16 @@ def __init__(self, img, patch_shape=(3, 3), **kwargs): # Alternative: sklearn.feature_extraction.image.extract_patches_2d. # sklearn has much less dependencies than skimage. try: - import skimage + from skimage.util import view_as_windows except Exception as e: raise ImportError('Cannot import skimage, which is needed to ' 'extract patches. Try to install it with ' 'pip (or conda) install scikit-image. ' 'Original exception: {}'.format(e)) - patches = skimage.util.view_as_windows(img, window_shape=window_shape) - patches = patches.reshape((h * w, r * c * d)) + self.patches = view_as_windows(img, window_shape) + self.patches = self.patches.reshape((h * w, r * c * d)) - super(ImgPatches, self).__init__(patches, **kwargs) + super(ImgPatches, self).__init__(self.patches, **kwargs) def _get_extra_repr(self): attrs = dict(patch_shape=self.patch_shape) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 2cdacf73..22246bfa 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -1,6 +1,8 @@ # -*- coding: utf-8 -*- -import traceback +from __future__ import division + +from functools import partial import numpy as np from scipy import sparse, spatial @@ -8,192 +10,532 @@ from pygsp import utils from pygsp.graphs import Graph # prevent circular import in Python < 3.5 + _logger = utils.build_logger(__name__) -def _import_pfl(): +def _scipy_pdist(features, metric, order, kind, k, radius, params): + if params: + raise ValueError('unexpected parameters {}'.format(params)) + metric = 'cityblock' if metric == 'manhattan' else metric + metric = 'chebyshev' if metric == 'max_dist' else metric + params = dict(metric=metric) + if metric == 'minkowski': + params['p'] = order + dist = spatial.distance.pdist(features, **params) + dist = spatial.distance.squareform(dist) + if kind == 'knn': + neighbors = np.argsort(dist)[:, :k+1] + distances = np.take_along_axis(dist, neighbors, axis=-1) + elif kind == 'radius': + distances = [] + neighbors = [] + for distance in dist: + neighbor = np.flatnonzero(distance < radius) + neighbors.append(neighbor) + distances.append(distance[neighbor]) + return neighbors, distances + + +def _scipy_kdtree(features, _, order, kind, k, radius, params): + if order is None: + raise ValueError('invalid metric for scipy-kdtree') + eps = params.pop('eps', 0) + tree = spatial.KDTree(features, **params) + params = dict(p=order, eps=eps) + if kind == 'knn': + params['k'] = k + 1 + elif kind == 'radius': + params['k'] = None + params['distance_upper_bound'] = radius + distances, neighbors = tree.query(features, **params) + return neighbors, distances + + +def _scipy_ckdtree(features, _, order, kind, k, radius, params): + if order is None: + raise ValueError('invalid metric for scipy-kdtree') + eps = params.pop('eps', 0) + tree = spatial.cKDTree(features, **params) + params = dict(p=order, eps=eps, n_jobs=-1) + if kind == 'knn': + params['k'] = k + 1 + elif kind == 'radius': + params['k'] = features.shape[0] # number of vertices + params['distance_upper_bound'] = radius + distances, neighbors = tree.query(features, **params) + if kind == 'knn': + return neighbors, distances + elif kind == 'radius': + dist = [] + neigh = [] + for distance, neighbor in zip(distances, neighbors): + mask = (distance != np.inf) + dist.append(distance[mask]) + neigh.append(neighbor[mask]) + return neigh, dist + + +def _flann(features, metric, order, kind, k, radius, params): + if metric == 'max_dist': + raise ValueError('flann gives wrong results for metric="max_dist".') + try: + import cyflann as cfl + except Exception as e: + raise ImportError('Cannot import cyflann. Choose another nearest ' + 'neighbors backend or try to install it with ' + 'pip (or conda) install cyflann. ' + 'Original exception: {}'.format(e)) + cfl.set_distance_type(metric, order=order) + index = cfl.FLANNIndex() + index.build_index(features, **params) + # I tried changing the algorithm and testing performance on huge matrices, + # but the default parameters seems to work best. + if kind == 'knn': + neighbors, distances = index.nn_index(features, k+1) + if metric == 'euclidean': + np.sqrt(distances, out=distances) + elif metric == 'minkowski': + np.power(distances, 1/order, out=distances) + elif kind == 'radius': + distances = [] + neighbors = [] + if metric == 'euclidean': + radius = radius**2 + elif metric == 'minkowski': + radius = radius**order + n_vertices, _ = features.shape + for vertex in range(n_vertices): + neighbor, distance = index.nn_radius(features[vertex, :], radius) + distances.append(distance) + neighbors.append(neighbor) + if metric == 'euclidean': + distances = list(map(np.sqrt, distances)) + elif metric == 'minkowski': + distances = list(map(lambda d: np.power(d, 1/order), distances)) + index.free_index() + return neighbors, distances + + +def _nmslib(features, metric, order, kind, k, _, params): + if kind == 'radius': + raise ValueError('nmslib does not support kind="radius".') + if metric == 'minkowski': + raise ValueError('nmslib does not support metric="minkowski".') try: - import pyflann as pfl + import nmslib as nms except Exception as e: - raise ImportError('Cannot import pyflann. Choose another nearest ' - 'neighbors method or try to install it with ' - 'pip (or conda) install pyflann (or pyflann3). ' + raise ImportError('Cannot import nmslib. Choose another nearest ' + 'neighbors backend or try to install it with ' + 'pip (or conda) install nmslib. ' 'Original exception: {}'.format(e)) - return pfl + n_vertices, _ = features.shape + params_index = params.pop('index', None) + params_query = params.pop('query', None) + metric = 'l2' if metric == 'euclidean' else metric + metric = 'l1' if metric == 'manhattan' else metric + metric = 'linf' if metric == 'max_dist' else metric + index = nms.init(space=metric, **params) + index.addDataPointBatch(features) + index.createIndex(params_index) + if params_query is not None: + index.setQueryTimeParams(params_query) + results = index.knnQueryBatch(features, k=k+1) + neighbors, distances = zip(*results) + distances = np.concatenate(distances).reshape(n_vertices, k+1) + neighbors = np.concatenate(neighbors).reshape(n_vertices, k+1) + return neighbors, distances class NNGraph(Graph): - r"""Nearest-neighbor graph from given point cloud. - + r"""Nearest-neighbor graph. + The nearest-neighbor graph is built from a set of features, where the edge + weight between vertices :math:`v_i` and :math:`v_j` is given by + .. math:: A(i,j) = k \left( \frac{d(v_i, v_j)}{\sigma} \right), + where :math:`d(v_i, v_j)` is a distance measure between some representation + (the features) of :math:`v_i` and :math:`v_j`, :math:`k` is a kernel + function that transforms a distance in :math:`[0, \infty]` to a similarity + measure generally in :math:`[0, 1]`, and :math:`\sigma` is the kernel width. + For example, the features might be the 3D coordinates of points in a point + cloud. Then, if ``metric='euclidean'`` and ``kernel='gaussian'`` (the + defaults), :math:`A(i,j) = \exp(-\log(2) \| x_i - x_j \|_2^2 / \sigma^2)`, + where :math:`x_i` is the 3D position of vertex :math:`v_i`. + The similarity matrix :math:`A` is sparsified by either keeping the ``k`` + closest vertices for each vertex (if ``type='knn'``), or by setting to zero + the similarity when the distance is greater than ``radius`` (if ``type='radius'``). Parameters ---------- - Xin : ndarray - Input points, Should be an `N`-by-`d` matrix, where `N` is the number - of nodes in the graph and `d` is the dimension of the feature space. - NNtype : string, optional - Type of nearest neighbor graph to create. The options are 'knn' for - k-Nearest Neighbors or 'radius' for epsilon-Nearest Neighbors (default - is 'knn'). - use_flann : bool, optional - Use Fast Library for Approximate Nearest Neighbors (FLANN) or not. - (default is False) - center : bool, optional - Center the data so that it has zero mean (default is True) - rescale : bool, optional - Rescale the data so that it lies in a l2-sphere (default is True) - k : int, optional - Number of neighbors for knn (default is 10) - sigma : float, optional - Width of the similarity kernel. - By default, it is set to the average of the nearest neighbor distance. - epsilon : float, optional - Radius for the epsilon-neighborhood search (default is 0.01) - plotting : dict, optional - Dictionary of plotting parameters. See :obj:`pygsp.plotting`. - (default is {}) - symmetrize_type : string, optional - Type of symmetrization to use for the adjacency matrix. See - :func:`pygsp.utils.symmetrization` for the options. - (default is 'average') - dist_type : string, optional - Type of distance to compute. See - :func:`pyflann.index.set_distance_type` for possible options. - (default is 'euclidean') + features : ndarray + An `N`-by-`d` matrix, where `N` is the number of nodes in the graph and + `d` is the number of features. + standardize : bool, optional + Whether to rescale the features so that each feature has a mean of 0 + and standard deviation of 1 (unit variance). + metric : {'euclidean', 'manhattan', 'minkowski', 'max_dist'}, optional + Metric used to compute pairwise distances. + * ``'euclidean'`` defines pairwise distances as + :math:`d(v_i, v_j) = \| x_i - x_j \|_2`. + * ``'manhattan'`` defines pairwise distances as + :math:`d(v_i, v_j) = \| x_i - x_j \|_1`. + * ``'minkowski'`` generalizes the above and defines distances as + :math:`d(v_i, v_j) = \| x_i - x_j \|_p` + where :math:`p` is the ``order`` of the norm. + * ``'max_dist'`` defines pairwise distances as + :math:`d(v_i, v_j) = \| x_i - x_j \|_\infty = \max(x_i - x_j)`, where + the maximum is taken over the elements of the vector. + More metrics may be supported for some backends. + Please refer to the documentation of the chosen backend. order : float, optional - Only used if dist_type is 'minkowski'; represents the order of the - Minkowski distance. (default is 0) - + The order of the Minkowski distance for ``metric='minkowski'``. + kind : {'knn', 'radius'}, optional + Kind of nearest neighbor graph to create. Either ``'knn'`` for + k-nearest neighbors or ``'radius'`` for epsilon-nearest neighbors. + k : int, optional + Number of neighbors considered when building a k-NN graph with + ``type='knn'``. + radius : float or {'estimate', 'estimate-knn'}, optional + Radius of the ball when building a radius graph with ``type='radius'``. + It is hard to set an optimal radius. If too small, some vertices won't + be connected to any other vertex. If too high, vertices will be + connected to many other vertices and the graph won't be sparse (high + average degree). If no good radius is known a priori, we can estimate + one. ``'estimate'`` sets the radius as the expected average distance + between vertices for a uniform sampling of the ambient space. + ``'estimate-knn'`` first builds a knn graph and sets the radius to the + average distance. ``'estimate-knn'`` usually gives a better estimation + but is more costly. ``'estimate'`` can be better in low dimension. + kernel : string or function + The function :math:`k` that transforms a distance to a similarity. + The following kernels are pre-defined. + * ``'gaussian'`` defines the Gaussian, also known as the radial basis + function (RBF), kernel :math:`k(d) = \exp(-\log(2) d^2)`. + * ``'exponential'`` defines the kernel :math:`k(d) = \exp(-\log(2) d)`. + * ``'rectangular'`` returns 1 if :math:`d < 1` and 0 otherwise. + * ``'triangular'`` defines the kernel :math:`k(d) = \max(1 - d/2, 0)`. + * Other kernels are ``'tricube'``, ``'triweight'``, ``'quartic'``, + ``'epanechnikov'``, ``'logistic'``, and ``'sigmoid'``. + See `Wikipedia `_. + Another option is to pass a function that takes a vector of pairwise + distances and returns the similarities. All the predefined kernels + return a similarity of 0.5 when the distance is one. + An example of custom kernel is ``kernel=lambda d: d.min() / d``. + kernel_width : float, optional + Control the width, also known as the bandwidth, :math:`\sigma` of the + kernel. It scales the distances as ``distances / kernel_width`` before + calling the kernel function. + By default, it is set to the average of all computed distances for + ``kind='knn'`` and to half the radius for ``kind='radius'``. + backend : string, optional + * ``'scipy-pdist'`` uses :func:`scipy.spatial.distance.pdist` to + compute pairwise distances. The method is brute force and computes + all distances. That is the slowest method. + * ``'scipy-kdtree'`` uses :class:`scipy.spatial.KDTree`. The method + builds a k-d tree to prune the number of pairwise distances it has to + compute. That is an efficient strategy for low-dimensional spaces. + * ``'scipy-ckdtree'`` uses :class:`scipy.spatial.cKDTree`. The same as + ``'scipy-kdtree'`` but with C bindings, which should be faster. + That is the default. + * ``'flann'`` uses the `Fast Library for Approximate Nearest Neighbors + (FLANN) `_. That method is an + approximation. + * ``'nmslib'`` uses the `Non-Metric Space Library (NMSLIB) + `_. That method is an + approximation. It should be the fastest in high-dimensional spaces. + You can look at this `benchmark + `_ to get an idea of the + relative performance of those backends. It's nonetheless wise to run + some tests on your own data. + kwargs : dict + Parameters to be passed to the :class:`Graph` constructor or the + backend library. Examples -------- + Construction of a graph from a set of features. >>> import matplotlib.pyplot as plt - >>> X = np.random.RandomState(42).uniform(size=(30, 2)) - >>> G = graphs.NNGraph(X) + >>> rs = np.random.RandomState(42) + >>> features = rs.uniform(size=(30, 2)) + >>> G = graphs.NNGraph(features) >>> fig, axes = plt.subplots(1, 2) >>> _ = axes[0].spy(G.W, markersize=5) >>> _ = G.plot(ax=axes[1]) - + Radius versus knn graph. + >>> features = rs.uniform(size=(100, 3)) + >>> fig, ax = plt.subplots() + >>> G = graphs.NNGraph(features, kind='radius', radius=0.2964) + >>> label = 'radius graph ({} edges)'.format(G.n_edges) + >>> _ = ax.hist(G.W.data, bins=20, label=label, alpha=0.5) + >>> G = graphs.NNGraph(features, kind='knn', k=6) + >>> label = 'knn graph ({} edges)'.format(G.n_edges) + >>> _ = ax.hist(G.W.data, bins=20, label=label, alpha=0.5) + >>> _ = ax.legend() + >>> _ = ax.set_title('edge weights') + Control of the sparsity of knn and radius graphs. + >>> features = rs.uniform(size=(100, 3)) + >>> n_edges = dict(knn=[], radius=[]) + >>> n_neighbors = np.arange(1, 100, 5) + >>> radiuses = np.arange(0.05, 1.5, 0.05) + >>> for k in n_neighbors: + ... G = graphs.NNGraph(features, kind='knn', k=k) + ... n_edges['knn'].append(G.n_edges) + >>> for radius in radiuses: + ... G = graphs.NNGraph(features, kind='radius', radius=radius) + ... n_edges['radius'].append(G.n_edges) + >>> fig, axes = plt.subplots(1, 2, sharey=True) + >>> _ = axes[0].plot(n_neighbors, n_edges['knn']) + >>> _ = axes[1].plot(radiuses, n_edges['radius']) + >>> _ = axes[0].set_ylabel('number of edges') + >>> _ = axes[0].set_xlabel('number of neighbors (knn graph)') + >>> _ = axes[1].set_xlabel('radius (radius graph)') + >>> _ = fig.suptitle('Sparsity') + Choice of metric and the curse of dimensionality. + >>> fig, axes = plt.subplots(1, 2) + >>> for dim, ax in zip([3, 30], axes): + ... features = rs.uniform(size=(100, dim)) + ... for metric in ['euclidean', 'manhattan', 'max_dist', 'cosine']: + ... G = graphs.NNGraph(features, metric=metric, + ... backend='scipy-pdist') + ... _ = ax.hist(G.W.data, bins=20, label=metric, alpha=0.5) + ... _ = ax.legend() + ... _ = ax.set_title('edge weights, {} dimensions'.format(dim)) + Choice of kernel. + >>> fig, axes = plt.subplots(1, 2) + >>> width = 0.3 + >>> distances = np.linspace(0, 1, 200) + >>> for name, kernel in graphs.NNGraph._kernels.items(): + ... _ = axes[0].plot(distances, kernel(distances / width), label=name) + >>> _ = axes[0].set_xlabel('distance [0, inf]') + >>> _ = axes[0].set_ylabel('similarity [0, 1]') + >>> _ = axes[0].legend(loc='upper right') + >>> features = rs.uniform(size=(100, 3)) + >>> for kernel in ['gaussian', 'triangular', 'tricube', 'exponential']: + ... G = graphs.NNGraph(features, kernel=kernel) + ... _ = axes[1].hist(G.W.data, bins=20, label=kernel, alpha=0.5) + >>> _ = axes[1].legend() + >>> _ = axes[1].set_title('edge weights') + Choice of kernel width. + >>> fig, axes = plt.subplots() + >>> for width in [.2, .3, .4, .6, .8, None]: + ... G = graphs.NNGraph(features, kernel_width=width) + ... label = 'width = {:.2f}'.format(G.kernel_width) + ... _ = axes.hist(G.W.data, bins=20, label=label, alpha=0.5) + >>> _ = axes.legend(loc='upper left') + >>> _ = axes.set_title('edge weights') + Choice of backend. Compare on your data! + >>> import time + >>> sizes = [300, 1000, 3000] + >>> dims = [3, 100] + >>> backends = ['scipy-pdist', 'scipy-kdtree', 'scipy-ckdtree', 'flann', + ... 'nmslib'] + >>> times = np.full((len(sizes), len(dims), len(backends)), np.nan) + >>> for i, size in enumerate(sizes): + ... for j, dim in enumerate(dims): + ... for k, backend in enumerate(backends): + ... if (size * dim) > 1e4 and backend == 'scipy-kdtree': + ... continue # too slow + ... features = rs.uniform(size=(size, dim)) + ... start = time.time() + ... _ = graphs.NNGraph(features, backend=backend) + ... times[i][j][k] = time.time() - start + >>> fig, axes = plt.subplots(1, 2, sharey=True) + >>> for j, (dim, ax) in enumerate(zip(dims, axes)): + ... for k, backend in enumerate(backends): + ... _ = ax.loglog(sizes, times[:, j, k], '.-', label=backend) + ... _ = ax.set_title('{} dimensions'.format(dim)) + ... _ = ax.set_xlabel('number of vertices') + >>> _ = axes[0].set_ylabel('execution time [s]') + >>> _ = axes[1].legend(loc='upper left') """ - def __init__(self, Xin, NNtype='knn', use_flann=False, center=True, - rescale=True, k=10, sigma=None, epsilon=0.01, - plotting={}, symmetrize_type='average', dist_type='euclidean', - order=0, **kwargs): - - self.Xin = Xin - self.NNtype = NNtype - self.use_flann = use_flann - self.center = center - self.rescale = rescale + def __init__(self, features, standardize=False, + metric='euclidean', order=3, + kind='knn', k=10, radius='estimate-knn', + kernel='gaussian', kernel_width=None, + backend='scipy-ckdtree', + **kwargs): + + # features is stored in coords, potentially standardized + self.standardize = standardize + self.metric = metric + self.kind = kind + self.kernel = kernel self.k = k - self.sigma = sigma - self.epsilon = epsilon - self.symmetrize_type = symmetrize_type - self.dist_type = dist_type - self.order = order - - N, d = np.shape(self.Xin) - Xout = self.Xin - - if k >= N: - raise ValueError('The number of neighbors (k={}) must be smaller ' - 'than the number of nodes ({}).'.format(k, N)) - - if self.center: - Xout = self.Xin - np.kron(np.ones((N, 1)), - np.mean(self.Xin, axis=0)) - - if self.rescale: - bounding_radius = 0.5 * np.linalg.norm(np.amax(Xout, axis=0) - - np.amin(Xout, axis=0), 2) - scale = np.power(N, 1. / float(min(d, 3))) / 10. - Xout *= scale / bounding_radius - - # Translate distance type string to corresponding Minkowski order. - dist_translation = {"euclidean": 2, - "manhattan": 1, - "max_dist": np.inf, - "minkowski": order - } - - if self.NNtype == 'knn': - spi = np.zeros((N * k)) - spj = np.zeros((N * k)) - spv = np.zeros((N * k)) - - if self.use_flann: - pfl = _import_pfl() - pfl.set_distance_type(dist_type, order=order) - flann = pfl.FLANN() - - # Default FLANN parameters (I tried changing the algorithm and - # testing performance on huge matrices, but the default one - # seems to work best). - NN, D = flann.nn(Xout, Xout, num_neighbors=(k + 1), - algorithm='kdtree') - - else: - kdt = spatial.KDTree(Xout) - D, NN = kdt.query(Xout, k=(k + 1), - p=dist_translation[dist_type]) - - if self.sigma is None: - self.sigma = np.mean(D[:, 1:]) # Discard distance to self. - - for i in range(N): - spi[i * k:(i + 1) * k] = np.kron(np.ones((k)), i) - spj[i * k:(i + 1) * k] = NN[i, 1:] - spv[i * k:(i + 1) * k] = np.exp(-np.power(D[i, 1:], 2) / - float(self.sigma)) - - elif self.NNtype == 'radius': - - kdt = spatial.KDTree(Xout) - D, NN = kdt.query(Xout, k=None, distance_upper_bound=epsilon, - p=dist_translation[dist_type]) - if self.sigma is None: - # Discard distance to self. - self.sigma = np.mean([np.mean(d[1:]) for d in D]) - count = 0 - for i in range(N): - count = count + len(NN[i]) - - spi = np.zeros((count)) - spj = np.zeros((count)) - spv = np.zeros((count)) - - start = 0 - for i in range(N): - leng = len(NN[i]) - 1 - spi[start:start + leng] = np.kron(np.ones((leng)), i) - spj[start:start + leng] = NN[i][1:] - spv[start:start + leng] = np.exp(-np.power(D[i][1:], 2) / - float(self.sigma)) - start = start + leng - + self.backend = backend + + features = np.asanyarray(features) + if features.ndim != 2: + raise ValueError('features should be #vertices x dimensionality') + n_vertices, dimensionality = features.shape + + params_graph = dict() + for key in ['lap_type', 'plotting']: + try: + params_graph[key] = kwargs.pop(key) + except KeyError: + pass + + if standardize: + # Don't alter the original data (users would be surprised). + features = features - np.mean(features, axis=0) + features /= np.std(features, axis=0) + + # Order consistent with metric (used by kdtree and ckdtree). + _orders = { + 'euclidean': 2, + 'manhattan': 1, + 'max_dist': np.inf, + 'minkowski': order, + } + order = _orders.pop(metric, None) + + if kind == 'knn': + if not 1 <= k < n_vertices: + raise ValueError('The number of neighbors (k={}) must be ' + 'greater than 0 and smaller than the number ' + 'of vertices ({}).'.format(k, n_vertices)) + radius = None + elif kind == 'radius': + if radius == 'estimate': + maximums = np.amax(features, axis=0) + minimums = np.amin(features, axis=0) + distance_max = np.linalg.norm(maximums - minimums, order) + radius = distance_max / np.power(n_vertices, 1/dimensionality) + elif radius == 'estimate-knn': + graph = NNGraph(features, standardize=standardize, + metric=metric, order=order, kind='knn', k=k, + kernel_width=None, backend=backend, **kwargs) + radius = graph.kernel_width + elif radius <= 0: + raise ValueError('The radius must be greater than 0.') + self.k = None else: - raise ValueError('Unknown NNtype {}'.format(self.NNtype)) - - W = sparse.csc_matrix((spv, (spi, spj)), shape=(N, N)) + raise ValueError('Invalid kind "{}".'.format(kind)) + + try: + function = globals()['_' + backend.replace('-', '_')] + except KeyError: + raise ValueError('Invalid backend "{}".'.format(backend)) + + + neighbors, distances = function(features, metric, order, + kind, k, radius, kwargs) + # ------ MARTINO's MODIFICATION ------ + self.distances = distances + # ------------------------------------ + + n_edges = [len(n) - 1 for n in neighbors] # remove distance to self + + if kind == 'radius': + n_disconnected = np.sum(np.asarray(n_edges) == 0) + if n_disconnected > 0: + _logger.warning('{} vertices (out of {}) are disconnected. ' + 'Consider increasing the radius or setting ' + 'kind=knn.'.format(n_disconnected, n_vertices)) + + value = np.empty(sum(n_edges), dtype=np.float) + row = np.empty_like(value, dtype=np.int) + col = np.empty_like(value, dtype=np.int) + start = 0 + for vertex in range(n_vertices): + if kind == 'knn': + assert n_edges[vertex] == k + end = start + n_edges[vertex] + value[start:end] = distances[vertex][1:] + row[start:end] = np.full(n_edges[vertex], vertex) + col[start:end] = neighbors[vertex][1:] + start = end + W = sparse.csr_matrix((value, (row, col)), (n_vertices, n_vertices)) + + # Enforce symmetry. May have been broken by k-NN. Checking symmetry + # with np.abs(W - W.T).sum() is as costly as the symmetrization itself. + W = utils.symmetrize(W, method='fill') + + if kernel_width is None: + if kind == 'knn': + kernel_width = np.mean(W.data) if W.nnz > 0 else np.nan + elif kind == 'radius': + kernel_width = radius / 2 + + if not callable(kernel): + try: + kernel = self._kernels[kernel] + except KeyError: + raise ValueError('Unknown kernel {}.'.format(kernel)) + + assert np.all(W.data >= 0), 'Distance must be in [0, inf].' + W.data = kernel(W.data / kernel_width) + if not np.all((W.data >= 0) & (W.data <= 1)): + _logger.warning('Kernel returned similarity not in [0, 1].') - # Sanity check - if np.shape(W)[0] != np.shape(W)[1]: - raise ValueError('Weight matrix W is not square') - - # Enforce symmetry. Note that checking symmetry with - # np.abs(W - W.T).sum() is as costly as the symmetrization itself. - W = utils.symmetrize(W, method=symmetrize_type) + self.order = order + self.radius = radius + self.kernel_width = kernel_width - super(NNGraph, self).__init__(W=W, plotting=plotting, - coords=Xout, **kwargs) + super(NNGraph, self).__init__(W, coords=features, **params_graph) def _get_extra_repr(self): - return {'NNtype': self.NNtype, - 'use_flann': self.use_flann, - 'center': self.center, - 'rescale': self.rescale, - 'k': self.k, - 'sigma': '{:.2f}'.format(self.sigma), - 'epsilon': '{:.2f}'.format(self.epsilon), - 'symmetrize_type': self.symmetrize_type, - 'dist_type': self.dist_type, - 'order': self.order} + attrs = { + 'standardize': self.standardize, + 'metric': self.metric, + 'order': self.order, + 'kind': self.kind, + } + if self.k is not None: + attrs['k'] = self.k + if self.radius is not None: + attrs['radius'] = '{:.2e}'.format(self.radius) + attrs.update({ + 'kernel': '{}'.format(self.kernel), + 'kernel_width': '{:.2e}'.format(self.kernel_width), + 'backend': self.backend, + }) + return attrs + + @staticmethod + def _kernel_rectangular(distance): + return (distance < 1).astype(np.float) + + @staticmethod + def _kernel_triangular(distance, value_at_one=0.5): + distance = value_at_one * distance + return np.maximum(1 - distance, 0) + + @staticmethod + def _kernel_exponential(distance, power=1, value_at_one=0.5): + cst = np.log(value_at_one) + return np.exp(cst * distance**power) + + @staticmethod + def _kernel_powers(distance, pow1, pow2, value_at_one=0.5): + cst = (1 - value_at_one**(1/pow2))**(1/pow1) + distance = np.clip(cst * distance, 0, 1) + return (1 - distance**pow1)**pow2 + + @staticmethod + def _kernel_logistic(distance, value_at_one=0.5): + cst = 4 / value_at_one - 2 + cst = np.log(0.5 * (cst + np.sqrt(cst**2 - 4))) + distance = cst * distance + return 4 / (np.exp(distance) + 2 + np.exp(-distance)) + + @staticmethod + def _kernel_sigmoid(distance, value_at_one=0.5): + cst = 2 / value_at_one + cst = np.log(0.5 * (cst + np.sqrt(cst**2 - 4))) + distance = cst * distance + return 2 / (np.exp(distance) + np.exp(-distance)) + + _kernels = { + 'rectangular': _kernel_rectangular.__func__, + 'triangular': _kernel_triangular.__func__, + + 'exponential': _kernel_exponential.__func__, + 'gaussian': partial(_kernel_exponential.__func__, power=2), + + 'tricube': partial(_kernel_powers.__func__, pow1=3, pow2=3), + 'triweight': partial(_kernel_powers.__func__, pow1=2, pow2=3), + 'quartic': partial(_kernel_powers.__func__, pow1=2, pow2=2), + 'epanechnikov': partial(_kernel_powers.__func__, pow1=2, pow2=1), + + 'logistic': _kernel_logistic.__func__, + 'sigmoid': _kernel_sigmoid.__func__, + } diff --git a/pygsp/graphs/nngraphs/sensor.py b/pygsp/graphs/nngraphs/sensor.py index ac623a50..09764b02 100644 --- a/pygsp/graphs/nngraphs/sensor.py +++ b/pygsp/graphs/nngraphs/sensor.py @@ -75,9 +75,7 @@ def __init__(self, N=64, k=6, distributed=False, seed=None, **kwargs): coords = rs.uniform(0, 1, (N, 2)) - super(Sensor, self).__init__(Xin=coords, k=k, - rescale=False, center=False, - plotting=plotting, **kwargs) + super(Sensor, self).__init__(coords, k=k, plotting=plotting, **kwargs) def _get_extra_repr(self): return {'k': self.k, diff --git a/pygsp/graphs/nngraphs/sphere.py b/pygsp/graphs/nngraphs/sphere.py index e375b5b4..dead44ff 100644 --- a/pygsp/graphs/nngraphs/sphere.py +++ b/pygsp/graphs/nngraphs/sphere.py @@ -10,12 +10,12 @@ class Sphere(NNGraph): Parameters ---------- - radius : float - Radius of the sphere (default = 1) nb_pts : int Number of vertices (default = 300) nb_dim : int Dimension (default = 3) + diameter : float + Radius of the sphere (default = 2) sampling : string Variance of the distance kernel (default = 'random') (Can now only be 'random') @@ -35,14 +35,14 @@ class Sphere(NNGraph): """ def __init__(self, - radius=1, nb_pts=300, nb_dim=3, + diameter=2, sampling='random', seed=None, **kwargs): - self.radius = radius + self.diameter = diameter self.nb_pts = nb_pts self.nb_dim = nb_dim self.sampling = sampling @@ -55,6 +55,7 @@ def __init__(self, for i in range(self.nb_pts): pts[i] /= np.linalg.norm(pts[i]) + pts[i] *= (diameter / 2) else: @@ -64,15 +65,83 @@ def __init__(self, 'vertex_size': 80, } - super(Sphere, self).__init__(Xin=pts, k=10, - center=False, rescale=False, - plotting=plotting, **kwargs) + super(Sphere, self).__init__(pts, k=10, plotting=plotting, **kwargs) def _get_extra_repr(self): - attrs = {'radius': '{:.2f}'.format(self.radius), + attrs = {'diameter': '{:.2e}'.format(self.diameter), 'nb_pts': self.nb_pts, 'nb_dim': self.nb_dim, 'sampling': self.sampling, 'seed': self.seed} attrs.update(super(Sphere, self)._get_extra_repr()) return attrs + + +class SphereOptimalDimensionality(NNGraph): + r"""Spherical-shaped graph using optimal dimensionality sampling scheme (NN-graph). + + Parameters + ---------- + bandwidth : int + Resolution of the sampling scheme, corresponding to the number of latitude rings (default = 64) + distance_type : {'euclidean', 'geodesic'} + type of distance use to compute edge weights (default = 'euclidean') + + See Also + -------- + SphereEquiangular, SphereHealpix, SphereIcosahedron + + Notes + ------ + The optimal dimensionality[1]_ sampling scheme consists on `\mathtt{bandwidth}` latitude rings equispaced. + The number of longitude pixels is different for each rings, and correspond to the number of spherical harmonics \ + for each mode. + The number of pixels is then only `2*\mathtt{bandwidth}` + + References + ---------- + [1] Z. Khalid, R. A. Kennedy, et J. D. McEwen, « An Optimal-Dimensionality Sampling Scheme + on the Sphere with Fast Spherical Harmonic Transforms », IEEE Transactions on Signal Processing, + vol. 62, no. 17, pp. 4597‑4610, Sept. 2014. + + Examples + -------- + >>> import matplotlib.pyplot as plt + >>> G = graphs.SphereOptimalDimensionality(bandwidth=8) + >>> fig = plt.figure() + >>> ax1 = fig.add_subplot(121) + >>> ax2 = fig.add_subplot(122, projection='3d') + >>> _ = ax1.spy(G.W, markersize=1.5) + >>> _ = _ = G.plot(ax=ax2) + + """ + def __init__(self, bandwidth=64, distance_type='euclidean', **kwargs): + self.bandwidth = bandwidth + if distance_type not in ['geodesic', 'euclidean']: + raise ValueError('Unknown distance type value:' + distance_type) + + ## sampling and coordinates calculation + theta, phi = np.zeros(4*bandwidth**2), np.zeros(4*bandwidth**2) + index=0 + beta = np.pi * ((np.arange(2 * bandwidth + 1)%2)*(4*bandwidth-1)+np.arange(2 * bandwidth + 1)*- + 1**(np.arange(2 * bandwidth + 1)%2)) / (4 * bandwidth - 1) + for i in range(2*bandwidth): + alpha = 2 * np.pi * np.arange(2 * i + 1) / (2 * i + 1) + end = len(alpha) + theta[index:index+end], phi[index:index+end] = np.repeat(beta[i], end), alpha + index += end + self.lat, self.lon = theta-np.pi/2, phi + self.bwlat, self.bwlon = theta.shape + ct = np.cos(theta).flatten() + st = np.sin(theta).flatten() + cp = np.cos(phi).flatten() + sp = np.sin(phi).flatten() + x = st * cp + y = st * sp + z = ct + coords = np.vstack([x, y, z]).T + coords = np.asarray(coords, dtype=np.float32) + self.npix = len(coords) + + plotting = {"limits": np.array([-1, 1, -1, 1, -1, 1])} + super(SphereOptimalDimensionality, self).__init__(coords, k=4, plotting=plotting, **kwargs) diff --git a/pygsp/graphs/nngraphs/spherehealpix.py b/pygsp/graphs/nngraphs/spherehealpix.py new file mode 100644 index 00000000..67ecb542 --- /dev/null +++ b/pygsp/graphs/nngraphs/spherehealpix.py @@ -0,0 +1,149 @@ +# -*- coding: utf-8 -*- + +import numpy as np + +from pygsp.graphs import NNGraph # prevent circular import in Python < 3.5 + +def _import_hp(): + try: + import healpy as hp + except Exception as e: + raise ImportError('Cannot import healpy. Choose another graph ' + 'or try to install it with ' + 'conda install healpy. ' + 'Original exception: {}'.format(e)) + return hp + + +class SphereHealpix(NNGraph): + r"""Spherical-shaped graph using HEALPix sampling scheme (NN-graph). + + Parameters + ---------- + Nside : int + Resolution of the sampling scheme. It should be a power of 2 (default = 1024) + nest : bool + ordering of the pixels (default = True) + + See Also + -------- + SphereEquiangular, SphereIcosahedron + + Notes + ----- + This graph us based on the HEALPix[1]_ sampling scheme mainly used by the cosmologist. + Heat Kernel Distance is used to find its weight matrix. + + References + ---------- + [1] K. M. Gorski et al., « HEALPix -- a Framework for High Resolution Discretization, + and Fast Analysis of Data Distributed on the Sphere », ApJ, vol. 622, nᵒ 2, p. 759‑771, avr. 2005. + + Examples + -------- + >>> import matplotlib.pyplot as plt + >>> G = graphs.SphereHealpix(nside=4) + >>> fig = plt.figure() + >>> ax1 = fig.add_subplot(121) + >>> ax2 = fig.add_subplot(122, projection='3d') + >>> _ = ax1.spy(G.W, markersize=1.5) + >>> _ = _ = G.plot(ax=ax2) + + """ + + def __init__(self, indexes=None, nside=32, nest=True, kernel_width=None, n_neighbors=50, **kwargs): + hp = _import_hp() + self.nside = nside + self.nest = nest + npix = hp.nside2npix(nside) + if indexes is None: + indexes = np.arange(npix) + x, y, z = hp.pix2vec(nside, indexes, nest=nest) + self.lat, self.lon = hp.pix2ang(nside, indexes, nest=nest, lonlat=False) + coords = np.vstack([x, y, z]).transpose() + coords = np.asarray(coords, dtype=np.float32) + if n_neighbors is None: + n_neighbors = 6 if Nside==1 else 8 + + self.opt_std = dict() + # TODO: find best interpolator between n_side and n_neighbors. + self.opt_std = dict() + self.opt_std[20] = { + 1: 0.03185 * 32, # extrapolated + 2: 0.03185 * 16, # extrapolated + 4: 0.03185 * 8, # extrapolated + 8: 0.03185 * 4, # extrapolated + 16: 0.03185 * 2, # extrapolated + 32: 0.03185, + 64: 0.01564, + 128: 0.00782, + 256: 0.00391, + 512: 0.00196, + 1024: 0.00098, + 2048: 0.00098 / 2, # extrapolated + } + self.opt_std[40] = { + 1: 0.042432 * 32, # extrapolated + 2: 0.042432 * 16, # extrapolated + 4: 0.042432 * 8, # extrapolated + 8: 0.042432 * 4, # extrapolated + 16: 0.042432 * 2, # extrapolated + 32: 0.042432, + 64: 0.021354, + 128: 0.010595, + 256: 0.005551, # seems a bit off + #512: 0.003028, # seems buggy + 512: 0.005551 / 2, # extrapolated + 1024: 0.005551 / 4, # extrapolated + 2048: 0.005551 / 8, # extrapolated + } + self.opt_std[60] = { + 1: 0.051720 * 32, # extrapolated + 2: 0.051720 * 16, # extrapolated + 4: 0.051720 * 8, # extrapolated + 8: 0.051720 * 4, # extrapolated + 16: 0.051720 * 2, # extrapolated + 32: 0.051720, + 64: 0.025403, + 128: 0.012695, + 256: 0.006351, + #512: 0.002493, # seems buggy + 512: 0.006351 / 2, # extrapolated + 1024: 0.006351 / 4, # extrapolated + 2048: 0.006351 / 8, # extrapolated + } + self.opt_std[8] = { + 1: 0.02500 * 32, # extrapolated + 2: 0.02500 * 16, # extrapolated + 4: 0.02500 * 8, # extrapolated + 8: 0.02500 * 4, # extrapolated + 16: 0.02500 * 2, # extrapolated + 32: 0.02500, + 64: 0.01228, + 128: 0.00614, + 256: 0.00307, + 512: 0.00154, + 1024: 0.00077, + 2048: 0.00077 / 2, # extrapolated + } + try: + kernel_dict = self.opt_std[n_neighbors] + except: + raise ValueError('No sigma for number of neighbors {}'.format(n_neighbors)) + try: + kernel_width = kernel_dict[nside] + except: + raise ValueError('Unknown sigma for nside {}'.format(nside)) + ## TODO: check std + + ## TODO: n_neighbors in function of Nside + if len(indexes) <= n_neighbors: + n_neighbors = len(indexes)-1 + + plotting = { + 'vertex_size': 80, + "limits": np.array([-1, 1, -1, 1, -1, 1]) + } + super(SphereHealpix, self).__init__(features=coords, k=n_neighbors, + kernel_width=kernel_width, plotting=plotting, **kwargs) + diff --git a/pygsp/graphs/nngraphs/sphereicosahedron.py b/pygsp/graphs/nngraphs/sphereicosahedron.py new file mode 100644 index 00000000..403725cd --- /dev/null +++ b/pygsp/graphs/nngraphs/sphereicosahedron.py @@ -0,0 +1,717 @@ +# -*- coding: utf-8 -*- + +import numpy as np + +from pygsp.graphs import NNGraph # prevent circular import in Python < 3.5 + + + +class SphereIcosahedron(NNGraph): + r"""Spherical-shaped graph based on the projection of the icosahedron (NN-graph). + Code inspired by Max Jiang [https://github.com/maxjiang93/ugscnn/blob/master/meshcnn/mesh.py] + + Parameters + ---------- + level : int + Resolution of the sampling scheme, or how many times the faces are divided (default = 5) + sampling : string + What the pixels represent. Either a vertex or a face (default = 'vertex') + + See Also + -------- + SphereDodecahedron, SphereHealpix, SphereEquiangular + + Notes + ------ + The icosahedron is the dual of the dodecahedron. Thus the pixels in this graph represent either the vertices \ + of the icosahedron, or the faces of the dodecahedron. + + Examples + -------- + >>> import matplotlib.pyplot as plt + >>> from mpl_toolkits.mplot3d import Axes3D + >>> G = graphs.SphereIcosahedron(level=1) + >>> fig = plt.figure() + >>> ax1 = fig.add_subplot(121) + >>> ax2 = fig.add_subplot(122, projection='3d') + >>> _ = ax1.spy(G.W, markersize=1.5) + >>> _ = _ = G.plot(ax=ax2) + + """ + def __init__(self, level=5, sampling='vertex', **kwargs): + + if sampling not in ['vertex', 'face']: + raise ValueError('Unknown sampling value:' + sampling) + PHI = (1 + np.sqrt(5))/2 + radius = np.sqrt(PHI**2+1) + coords = [-1, PHI, 0, 1, PHI, 0, -1, -PHI, 0, 1, -PHI, 0, + 0, -1, PHI, 0, 1, PHI, 0, -1, -PHI, 0, 1, -PHI, + PHI, 0, -1, PHI, 0, 1, -PHI, 0, -1, -PHI, 0, 1] + coords = np.reshape(coords, (-1,3)) + coords = coords/radius + faces = [0, 11, 5, 0, 5, 1, 0, 1, 7, 0, 7, 10, 0, 10, 11, + 1, 5, 9, 5, 11, 4, 11, 10, 2, 10, 7, 6, 7, 1, 8, + 3, 9, 4, 3, 4, 2, 3, 2, 6, 3, 6, 8, 3, 8, 9, + 4, 9, 5, 2, 4, 11, 6, 2, 10, 8, 6, 7, 9, 8, 1] + self.faces = np.reshape(faces, (20, 3)) + self.level = level + self.intp = None + + coords = self._upward(coords, self.faces) + self.coords = coords + + for i in range(level): + self.divide() + self.normalize() + + if sampling=='face': + self.coords = self.coords[self.faces].mean(axis=1) + + self.lat, self.lon = self.xyz2latlong() + + self.npix = len(self.coords) + self.nf = 20 * 4**self.level + self.ne = 30 * 4**self.level + self.nv = self.ne - self.nf + 2 + self.nv_prev = int((self.ne / 4) - (self.nf / 4) + 2) + self.nv_next = int((self.ne * 4) - (self.nf * 4) + 2) + + plotting = { + 'vertex_size': 80, + "limits": np.array([-1, 1, -1, 1, -1, 1]) + } + + # change kind to 'radius', and add radius parameter. k will be ignored + neighbours = 3 if 'face' in sampling else (5 if level == 0 else 6) + super(SphereIcosahedron, self).__init__(self.coords, k=neighbours, plotting=plotting, **kwargs) + + def divide(self): + """ + Subdivide a mesh into smaller triangles. + """ + faces = self.faces + vertices = self.coords + face_index = np.arange(len(faces)) + + # the (c,3) int set of vertex indices + faces = faces[face_index] + # the (c, 3, 3) float set of points in the triangles + triangles = vertices[faces] + # the 3 midpoints of each triangle edge vstacked to a (3*c, 3) float + src_idx = np.vstack([faces[:, g] for g in [[0, 1], [1, 2], [2, 0]]]) + mid = np.vstack([triangles[:, g, :].mean(axis=1) for g in [[0, 1], + [1, 2], + [2, 0]]]) + mid_idx = (np.arange(len(face_index) * 3)).reshape((3, -1)).T + # for adjacent faces we are going to be generating the same midpoint + # twice, so we handle it here by finding the unique vertices + unique, inverse = self._unique_rows(mid) + + mid = mid[unique] + src_idx = src_idx[unique] + mid_idx = inverse[mid_idx] + len(vertices) + # the new faces, with correct winding + f = np.column_stack([faces[:, 0], mid_idx[:, 0], mid_idx[:, 2], + mid_idx[:, 0], faces[:, 1], mid_idx[:, 1], + mid_idx[:, 2], mid_idx[:, 1], faces[:, 2], + mid_idx[:, 0], mid_idx[:, 1], mid_idx[:, 2], ]).reshape((-1, 3)) + # add the 3 new faces per old face + new_faces = np.vstack((faces, f[len(face_index):])) + # replace the old face with a smaller face + new_faces[face_index] = f[:len(face_index)] + + new_vertices = np.vstack((vertices, mid)) + # source ids + nv = vertices.shape[0] + identity_map = np.stack((np.arange(nv), np.arange(nv)), axis=1) + src_id = np.concatenate((identity_map, src_idx), axis=0) + + self.coords = new_vertices + self.faces = new_faces + self.intp = src_id + + def normalize(self, radius=1): + ''' + Reproject to spherical surface + ''' + vectors = self.coords + scalar = (vectors ** 2).sum(axis=1)**.5 + unit = vectors / scalar.reshape((-1, 1)) + offset = radius - scalar + self.coords += unit * offset.reshape((-1, 1)) + + def xyz2latlong(self): + x, y, z = self.coords[:, 0], self.coords[:, 1], self.coords[:, 2] + long = np.arctan2(y, x) + np.pi + xy2 = x**2 + y**2 + lat = np.arctan2(z, np.sqrt(xy2)) + return lat, long + + def _upward(self, V_ico, F_ico, ind=11): + V0 = V_ico[ind] + Z0 = np.array([0, 0, 1]) + k = np.cross(V0, Z0) + ct = np.dot(V0, Z0) + st = -np.linalg.norm(k) + R = self._rot_matrix(k, ct, st) + V_ico = V_ico.dot(R) + # rotate a neighbor to align with (+y) + ni = self._find_neighbor(F_ico, ind)[0] + vec = V_ico[ni].copy() + vec[2] = 0 + vec = vec/np.linalg.norm(vec) + y_ = np.eye(3)[1] + + k = np.eye(3)[2] + crs = np.cross(vec, y_) + ct = -np.dot(vec, y_) + st = -np.sign(crs[-1])*np.linalg.norm(crs) + R2 = self._rot_matrix(k, ct, st) + V_ico = V_ico.dot(R2) + return V_ico + + def _find_neighbor(self, F, ind): + """find a icosahedron neighbor of vertex i""" + FF = [F[i] for i in range(F.shape[0]) if ind in F[i]] + FF = np.concatenate(FF) + FF = np.unique(FF) + neigh = [f for f in FF if f != ind] + return neigh + + def _rot_matrix(self, rot_axis, cos_t, sin_t): + k = rot_axis / np.linalg.norm(rot_axis) + I = np.eye(3) + + R = [] + for i in range(3): + v = I[i] + vr = v*cos_t+np.cross(k, v)*sin_t+k*(k.dot(v))*(1-cos_t) + R.append(vr) + R = np.stack(R, axis=-1) + return R + + def _ico_rot_matrix(self, ind): + """ + return rotation matrix to perform permutation corresponding to + moving a certain icosahedron node to the top + """ + v0_ = self.v0.copy() + f0_ = self.f0.copy() + V0 = v0_[ind] + Z0 = np.array([0, 0, 1]) + + # rotate the point to the top (+z) + k = np.cross(V0, Z0) + ct = np.dot(V0, Z0) + st = -np.linalg.norm(k) + R = self._rot_matrix(k, ct, st) + v0_ = v0_.dot(R) + + # rotate a neighbor to align with (+y) + ni = self._find_neighbor(f0_, ind)[0] + vec = v0_[ni].copy() + vec[2] = 0 + vec = vec/np.linalg.norm(vec) + y_ = np.eye(3)[1] + + k = np.eye(3)[2] + crs = np.cross(vec, y_) + ct = np.dot(vec, y_) + st = -np.sign(crs[-1])*np.linalg.norm(crs) + + R2 = self._rot_matrix(k, ct, st) + return R.dot(R2) + + def _rotseq(self, V, acc=9): + """sequence to move an original node on icosahedron to top""" + seq = [] + for i in range(11): + Vr = V.dot(self._ico_rot_matrix(i)) + # lexsort + s1 = np.lexsort(np.round(V.T, acc)) + s2 = np.lexsort(np.round(Vr.T, acc)) + s = s1[np.argsort(s2)] + seq.append(s) + return tuple(seq) + + def _unique_rows(self, data, digits=None): + """ + Returns indices of unique rows. It will return the + first occurrence of a row that is duplicated: + [[1,2], [3,4], [1,2]] will return [0,1] + Parameters + --------- + data: (n,m) set of floating point data + digits: how many digits to consider for the purposes of uniqueness + Returns + -------- + unique: (j) array, index in data which is a unique row + inverse: (n) length array to reconstruct original + example: unique[inverse] == data + """ + hashes = self._hashable_rows(data, digits=digits) + garbage, unique, inverse = np.unique(hashes, + return_index=True, + return_inverse=True) + return unique, inverse + + def _hashable_rows(self, data, digits=None): + """ + We turn our array into integers based on the precision + given by digits and then put them in a hashable format. + Parameters + --------- + data: (n,m) input array + digits: how many digits to add to hash, if data is floating point + If none, TOL_MERGE will be turned into a digit count and used. + Returns + --------- + hashable: (n) length array of custom data which can be sorted + or used as hash keys + """ + # if there is no data return immediatly + if len(data) == 0: + return np.array([]) + + # get array as integer to precision we care about + as_int = self._float_to_int(data, digits=digits) + + # if it is flat integers already, return + if len(as_int.shape) == 1: + return as_int + + # if array is 2D and smallish, we can try bitbanging + # this is signifigantly faster than the custom dtype + if len(as_int.shape) == 2 and as_int.shape[1] <= 4: + # time for some righteous bitbanging + # can we pack the whole row into a single 64 bit integer + precision = int(np.floor(64 / as_int.shape[1])) + # if the max value is less than precision we can do this + if np.abs(as_int).max() < 2**(precision - 1): + # the resulting package + hashable = np.zeros(len(as_int), dtype=np.int64) + # loop through each column and bitwise xor to combine + # make sure as_int is int64 otherwise bit offset won't work + for offset, column in enumerate(as_int.astype(np.int64).T): + # will modify hashable in place + np.bitwise_xor(hashable, + column << (offset * precision), + out=hashable) + return hashable + + # reshape array into magical data type that is weird but hashable + dtype = np.dtype((np.void, as_int.dtype.itemsize * as_int.shape[1])) + # make sure result is contiguous and flat + hashable = np.ascontiguousarray(as_int).view(dtype).reshape(-1) + return hashable + + def _float_to_int(self, data, digits=None, dtype=np.int32): + """ + Given a numpy array of float/bool/int, return as integers. + Parameters + ------------- + data: (n, d) float, int, or bool data + digits: float/int precision for float conversion + dtype: numpy dtype for result + Returns + ------------- + as_int: data, as integers + """ + # convert to any numpy array + data = np.asanyarray(data) + + # if data is already an integer or boolean we're done + # if the data is empty we are also done + if data.dtype.kind in 'ib' or data.size == 0: + return data.astype(dtype) + + # populate digits from kwargs + if digits is None: + digits = self._decimal_to_digits(1e-8) + elif isinstance(digits, float) or isinstance(digits, np.float): + digits = self._decimal_to_digits(digits) + elif not (isinstance(digits, int) or isinstance(digits, np.integer)): + # log.warn('Digits were passed as %s!', digits.__class__.__name__) + raise ValueError('Digits must be None, int, or float!') + + # data is float so convert to large integers + data_max = np.abs(data).max() * 10**digits + # ignore passed dtype if we have something large + dtype = [np.int32, np.int64][int(data_max > 2**31)] + # multiply by requested power of ten + # then subtract small epsilon to avoid "go either way" rounding + # then do the rounding and convert to integer + as_int = np.round((data * 10 ** digits) - 1e-6).astype(dtype) + + return as_int + + + def _decimal_to_digits(self, decimal, min_digits=None): + """ + Return the number of digits to the first nonzero decimal. + Parameters + ----------- + decimal: float + min_digits: int, minumum number of digits to return + Returns + ----------- + digits: int, number of digits to the first nonzero decimal + """ + digits = abs(int(np.log10(decimal))) + if min_digits is not None: + digits = np.clip(digits, min_digits, 20) + return digits + + +class SphereDodecahedron(NNGraph): + r"""Spherical-shaped graph based on the projection of the dodecahedron (NN-graph). + Code inspired by Max Jiang [https://github.com/maxjiang93/ugscnn/blob/master/meshcnn/mesh.py] + + Parameters + ---------- + level : int + Resolution of the sampling scheme, or how many times the faces are divided (default = 5) + sampling : string + What the pixels represent. Either a vertex or a face (default = 'vertex') + + See Also + -------- + SphereIcosahedron, SphereHealpix, SphereEquiangular + + Notes + ------ + The dodecahedron is the dual of the icosahedron. Thus the pixels in this graph represent either the vertices \ + of the dodecahedron, or the faces of the icosahedron. + + Examples + -------- + >>> import matplotlib.pyplot as plt + >>> from mpl_toolkits.mplot3d import Axes3D + >>> G = graphs.SphereDodecahedron(level=1) + >>> fig = plt.figure() + >>> ax1 = fig.add_subplot(121) + >>> ax2 = fig.add_subplot(122, projection='3d') + >>> _ = ax1.spy(G.W, markersize=1.5) + >>> _ = _ = G.plot(ax=ax2) + + """ + def __init__(self, level=5, **kwargs): + + PHI = (1 + np.sqrt(5))/2 + radius = np.sqrt(PHI**2+1) + coords = [-1, PHI, 0, 1, PHI, 0, -1, -PHI, 0, 1, -PHI, 0, + 0, -1, PHI, 0, 1, PHI, 0, -1, -PHI, 0, 1, -PHI, + PHI, 0, -1, PHI, 0, 1, -PHI, 0, -1, -PHI, 0, 1] + coords = np.reshape(coords, (-1,3)) + coords = coords/radius + faces = [0, 11, 5, 0, 5, 1, 0, 1, 7, 0, 7, 10, 0, 10, 11, + 1, 5, 9, 5, 11, 4, 11, 10, 2, 10, 7, 6, 7, 1, 8, + 3, 9, 4, 3, 4, 2, 3, 2, 6, 3, 6, 8, 3, 8, 9, + 4, 9, 5, 2, 4, 11, 6, 2, 10, 8, 6, 7, 9, 8, 1] + self.faces = np.reshape(faces, (20, 3)) + self.level = level + self.intp = None + + coords = self._upward(coords, self.faces) + self.coords = coords + + for i in range(level): + self.divide() + self.normalize() + + self.coords = self.coords[self.faces].mean(axis=1) + + self.lat, self.lon = self.xyz2latlong() + + self.npix = len(self.coords) + self.nf = 20 * 4**self.level + self.ne = 30 * 4**self.level + self.nv = self.ne - self.nf + 2 + self.nv_prev = int((self.ne / 4) - (self.nf / 4) + 2) + self.nv_next = int((self.ne * 4) - (self.nf * 4) + 2) + + plotting = { + 'vertex_size': 80, + "limits": np.array([-1, 1, -1, 1, -1, 1]) + } + + # change kind to 'radius', and add radius parameter. k will be ignored + neighbours = 3 + super(SphereIcosahedron, self).__init__(self.coords, k=neighbours, plotting=plotting, **kwargs) + + def divide(self): + """ + Subdivide a mesh into smaller triangles. + """ + faces = self.faces + vertices = self.coords + face_index = np.arange(len(faces)) + + # the (c,3) int set of vertex indices + faces = faces[face_index] + # the (c, 3, 3) float set of points in the triangles + triangles = vertices[faces] + # the 3 midpoints of each triangle edge vstacked to a (3*c, 3) float + src_idx = np.vstack([faces[:, g] for g in [[0, 1], [1, 2], [2, 0]]]) + mid = np.vstack([triangles[:, g, :].mean(axis=1) for g in [[0, 1], + [1, 2], + [2, 0]]]) + mid_idx = (np.arange(len(face_index) * 3)).reshape((3, -1)).T + # for adjacent faces we are going to be generating the same midpoint + # twice, so we handle it here by finding the unique vertices + unique, inverse = self._unique_rows(mid) + + mid = mid[unique] + src_idx = src_idx[unique] + mid_idx = inverse[mid_idx] + len(vertices) + # the new faces, with correct winding + f = np.column_stack([faces[:, 0], mid_idx[:, 0], mid_idx[:, 2], + mid_idx[:, 0], faces[:, 1], mid_idx[:, 1], + mid_idx[:, 2], mid_idx[:, 1], faces[:, 2], + mid_idx[:, 0], mid_idx[:, 1], mid_idx[:, 2], ]).reshape((-1, 3)) + # add the 3 new faces per old face + new_faces = np.vstack((faces, f[len(face_index):])) + # replace the old face with a smaller face + new_faces[face_index] = f[:len(face_index)] + + new_vertices = np.vstack((vertices, mid)) + # source ids + nv = vertices.shape[0] + identity_map = np.stack((np.arange(nv), np.arange(nv)), axis=1) + src_id = np.concatenate((identity_map, src_idx), axis=0) + + self.coords = new_vertices + self.faces = new_faces + self.intp = src_id + + def normalize(self, radius=1): + ''' + Reproject to spherical surface + ''' + vectors = self.coords + scalar = (vectors ** 2).sum(axis=1)**.5 + unit = vectors / scalar.reshape((-1, 1)) + offset = radius - scalar + self.coords += unit * offset.reshape((-1, 1)) + + def xyz2latlong(self): + x, y, z = self.coords[:, 0], self.coords[:, 1], self.coords[:, 2] + long = np.arctan2(y, x) + np.pi + xy2 = x**2 + y**2 + lat = np.arctan2(z, np.sqrt(xy2)) + return lat, long + + def _upward(self, V_ico, F_ico, ind=11): + V0 = V_ico[ind] + Z0 = np.array([0, 0, 1]) + k = np.cross(V0, Z0) + ct = np.dot(V0, Z0) + st = -np.linalg.norm(k) + R = self._rot_matrix(k, ct, st) + V_ico = V_ico.dot(R) + # rotate a neighbor to align with (+y) + ni = self._find_neighbor(F_ico, ind)[0] + vec = V_ico[ni].copy() + vec[2] = 0 + vec = vec/np.linalg.norm(vec) + y_ = np.eye(3)[1] + + k = np.eye(3)[2] + crs = np.cross(vec, y_) + ct = -np.dot(vec, y_) + st = -np.sign(crs[-1])*np.linalg.norm(crs) + R2 = self._rot_matrix(k, ct, st) + V_ico = V_ico.dot(R2) + return V_ico + + def _find_neighbor(self, F, ind): + """find a icosahedron neighbor of vertex i""" + FF = [F[i] for i in range(F.shape[0]) if ind in F[i]] + FF = np.concatenate(FF) + FF = np.unique(FF) + neigh = [f for f in FF if f != ind] + return neigh + + def _rot_matrix(self, rot_axis, cos_t, sin_t): + k = rot_axis / np.linalg.norm(rot_axis) + I = np.eye(3) + + R = [] + for i in range(3): + v = I[i] + vr = v*cos_t+np.cross(k, v)*sin_t+k*(k.dot(v))*(1-cos_t) + R.append(vr) + R = np.stack(R, axis=-1) + return R + + def _ico_rot_matrix(self, ind): + """ + return rotation matrix to perform permutation corresponding to + moving a certain icosahedron node to the top + """ + v0_ = self.v0.copy() + f0_ = self.f0.copy() + V0 = v0_[ind] + Z0 = np.array([0, 0, 1]) + + # rotate the point to the top (+z) + k = np.cross(V0, Z0) + ct = np.dot(V0, Z0) + st = -np.linalg.norm(k) + R = self._rot_matrix(k, ct, st) + v0_ = v0_.dot(R) + + # rotate a neighbor to align with (+y) + ni = self._find_neighbor(f0_, ind)[0] + vec = v0_[ni].copy() + vec[2] = 0 + vec = vec/np.linalg.norm(vec) + y_ = np.eye(3)[1] + + k = np.eye(3)[2] + crs = np.cross(vec, y_) + ct = np.dot(vec, y_) + st = -np.sign(crs[-1])*np.linalg.norm(crs) + + R2 = self._rot_matrix(k, ct, st) + return R.dot(R2) + + def _rotseq(self, V, acc=9): + """sequence to move an original node on icosahedron to top""" + seq = [] + for i in range(11): + Vr = V.dot(self._ico_rot_matrix(i)) + # lexsort + s1 = np.lexsort(np.round(V.T, acc)) + s2 = np.lexsort(np.round(Vr.T, acc)) + s = s1[np.argsort(s2)] + seq.append(s) + return tuple(seq) + + def _unique_rows(self, data, digits=None): + """ + Returns indices of unique rows. It will return the + first occurrence of a row that is duplicated: + [[1,2], [3,4], [1,2]] will return [0,1] + Parameters + --------- + data: (n,m) set of floating point data + digits: how many digits to consider for the purposes of uniqueness + Returns + -------- + unique: (j) array, index in data which is a unique row + inverse: (n) length array to reconstruct original + example: unique[inverse] == data + """ + hashes = self._hashable_rows(data, digits=digits) + garbage, unique, inverse = np.unique(hashes, + return_index=True, + return_inverse=True) + return unique, inverse + + def _hashable_rows(self, data, digits=None): + """ + We turn our array into integers based on the precision + given by digits and then put them in a hashable format. + Parameters + --------- + data: (n,m) input array + digits: how many digits to add to hash, if data is floating point + If none, TOL_MERGE will be turned into a digit count and used. + Returns + --------- + hashable: (n) length array of custom data which can be sorted + or used as hash keys + """ + # if there is no data return immediatly + if len(data) == 0: + return np.array([]) + + # get array as integer to precision we care about + as_int = self._float_to_int(data, digits=digits) + + # if it is flat integers already, return + if len(as_int.shape) == 1: + return as_int + + # if array is 2D and smallish, we can try bitbanging + # this is signifigantly faster than the custom dtype + if len(as_int.shape) == 2 and as_int.shape[1] <= 4: + # time for some righteous bitbanging + # can we pack the whole row into a single 64 bit integer + precision = int(np.floor(64 / as_int.shape[1])) + # if the max value is less than precision we can do this + if np.abs(as_int).max() < 2**(precision - 1): + # the resulting package + hashable = np.zeros(len(as_int), dtype=np.int64) + # loop through each column and bitwise xor to combine + # make sure as_int is int64 otherwise bit offset won't work + for offset, column in enumerate(as_int.astype(np.int64).T): + # will modify hashable in place + np.bitwise_xor(hashable, + column << (offset * precision), + out=hashable) + return hashable + + # reshape array into magical data type that is weird but hashable + dtype = np.dtype((np.void, as_int.dtype.itemsize * as_int.shape[1])) + # make sure result is contiguous and flat + hashable = np.ascontiguousarray(as_int).view(dtype).reshape(-1) + return hashable + + def _float_to_int(self, data, digits=None, dtype=np.int32): + """ + Given a numpy array of float/bool/int, return as integers. + Parameters + ------------- + data: (n, d) float, int, or bool data + digits: float/int precision for float conversion + dtype: numpy dtype for result + Returns + ------------- + as_int: data, as integers + """ + # convert to any numpy array + data = np.asanyarray(data) + + # if data is already an integer or boolean we're done + # if the data is empty we are also done + if data.dtype.kind in 'ib' or data.size == 0: + return data.astype(dtype) + + # populate digits from kwargs + if digits is None: + digits = self._decimal_to_digits(1e-8) + elif isinstance(digits, float) or isinstance(digits, np.float): + digits = self._decimal_to_digits(digits) + elif not (isinstance(digits, int) or isinstance(digits, np.integer)): + # log.warn('Digits were passed as %s!', digits.__class__.__name__) + raise ValueError('Digits must be None, int, or float!') + + # data is float so convert to large integers + data_max = np.abs(data).max() * 10**digits + # ignore passed dtype if we have something large + dtype = [np.int32, np.int64][int(data_max > 2**31)] + # multiply by requested power of ten + # then subtract small epsilon to avoid "go either way" rounding + # then do the rounding and convert to integer + as_int = np.round((data * 10 ** digits) - 1e-6).astype(dtype) + + return as_int + + + def _decimal_to_digits(self, decimal, min_digits=None): + """ + Return the number of digits to the first nonzero decimal. + Parameters + ----------- + decimal: float + min_digits: int, minumum number of digits to return + Returns + ----------- + digits: int, number of digits to the first nonzero decimal + """ + digits = abs(int(np.log10(decimal))) + if min_digits is not None: + digits = np.clip(digits, min_digits, 20) + return digits diff --git a/pygsp/graphs/nngraphs/twomoons.py b/pygsp/graphs/nngraphs/twomoons.py index af87d0d8..1d7cdde1 100644 --- a/pygsp/graphs/nngraphs/twomoons.py +++ b/pygsp/graphs/nngraphs/twomoons.py @@ -16,7 +16,7 @@ class TwoMoons(NNGraph): two_moons graph or a synthesized one (default is 'standard'). 'standard' : Create a two_moons graph from a based graph. 'synthesized' : Create a synthesized two_moon - sigmag : float + kernel_width : float Variance of the distance kernel (default = 0.05) dim : int The dimensionality of the points (default = 2). @@ -24,7 +24,7 @@ class TwoMoons(NNGraph): N : int Number of vertices (default = 2000) Only valid for moontype == 'synthesized'. - sigmad : float + variance : float Variance of the data (do not set it too high or you won't see anything) (default = 0.05) Only valid for moontype == 'synthesized'. @@ -44,11 +44,11 @@ class TwoMoons(NNGraph): """ - def _create_arc_moon(self, N, sigmad, distance, number, seed): + def _create_arc_moon(self, N, variance, distance, number, seed): rs = np.random.RandomState(seed) phi = rs.rand(N, 1) * np.pi r = 1 - rb = sigmad * rs.normal(size=(N, 1)) + rb = variance * rs.normal(size=(N, 1)) ab = rs.rand(N, 1) * 2 * np.pi b = rb * np.exp(1j * ab) bx = np.real(b) @@ -63,29 +63,29 @@ def _create_arc_moon(self, N, sigmad, distance, number, seed): return np.concatenate((moonx, moony), axis=1) - def __init__(self, moontype='standard', dim=2, sigmag=0.05, - N=400, sigmad=0.07, distance=0.5, seed=None, **kwargs): + def __init__(self, moontype='standard', dim=2, kernel_width=0.05, + N=400, variance=0.07, distance=0.5, seed=None, **kwargs): self.moontype = moontype self.dim = dim - self.sigmag = sigmag - self.sigmad = sigmad + self.kernel_width = kernel_width + self.variance = variance self.distance = distance self.seed = seed if moontype == 'standard': N1, N2 = 1000, 1000 data = utils.loadmat('pointclouds/two_moons') - Xin = data['features'][:dim].T + coords = data['features'][:dim].T elif moontype == 'synthesized': N1 = N // 2 N2 = N - N1 - coords1 = self._create_arc_moon(N1, sigmad, distance, 1, seed) - coords2 = self._create_arc_moon(N2, sigmad, distance, 2, seed) + coords1 = self._create_arc_moon(N1, variance, distance, 1, seed) + coords2 = self._create_arc_moon(N2, variance, distance, 2, seed) - Xin = np.concatenate((coords1, coords2)) + coords = np.concatenate((coords1, coords2)) else: raise ValueError('Unknown moontype {}'.format(moontype)) @@ -96,15 +96,14 @@ def __init__(self, moontype='standard', dim=2, sigmag=0.05, 'vertex_size': 30, } - super(TwoMoons, self).__init__(Xin=Xin, sigma=sigmag, k=5, - center=False, rescale=False, + super(TwoMoons, self).__init__(coords, kernel_width=kernel_width, k=5, plotting=plotting, **kwargs) def _get_extra_repr(self): attrs = {'moontype': self.moontype, 'dim': self.dim, - 'sigmag': '{:.2f}'.format(self.sigmag), - 'sigmad': '{:.2f}'.format(self.sigmad), + 'kernel_width': '{:.2f}'.format(self.kernel_width), + 'variance': '{:.2f}'.format(self.variance), 'distance': '{:.2f}'.format(self.distance), 'seed': self.seed} attrs.update(super(TwoMoons, self)._get_extra_repr()) diff --git a/pygsp/graphs/path.py b/pygsp/graphs/path.py index 480baa12..94aec2c4 100644 --- a/pygsp/graphs/path.py +++ b/pygsp/graphs/path.py @@ -9,11 +9,31 @@ class Path(Graph): r"""Path graph. + A signal on the path graph is akin to a 1-dimensional signal in classical + signal processing. + + On the path graph, the graph Fourier transform (GFT) is the classical + discrete cosine transform (DCT_). + As the type-II DCT, the GFT assumes even boundary conditions on both sides. + + .. _DCT: https://en.wikipedia.org/wiki/Discrete_cosine_transform + Parameters ---------- N : int Number of vertices. + See Also + -------- + Ring : 1D line with periodic boundary conditions + Grid2d : Kronecker product of two path graphs + + References + ---------- + :cite:`strang1999dct` shows that each DCT basis contains the eigenvectors + of a symmetric "second difference" matrix. + They get the eight types of DCTs by varying the boundary conditions. + Examples -------- >>> import matplotlib.pyplot as plt @@ -23,9 +43,17 @@ class Path(Graph): ... _ = axes[i, 0].spy(G.W) ... _ = G.plot(ax=axes[i, 1]) - References - ---------- - See :cite:`strang1999discrete` for more informations. + The GFT of the path graph is the classical DCT. + + >>> from matplotlib import pyplot as plt + >>> n_eigenvectors = 4 + >>> graph = graphs.Path(30) + >>> fig, axes = plt.subplots(1, 2) + >>> graph.set_coordinates('line1D') + >>> graph.compute_fourier_basis() + >>> _ = graph.plot(graph.U[:, :n_eigenvectors], ax=axes[0]) + >>> _ = axes[0].legend(range(n_eigenvectors)) + >>> _ = axes[1].plot(graph.e, '.') """ @@ -44,7 +72,7 @@ def __init__(self, N=16, directed=False, **kwargs): W = sparse.csr_matrix((weights, (sources, targets)), shape=(N, N)) plotting = {"limits": np.array([-1, N, -1, 1])} - super(Path, self).__init__(W=W, plotting=plotting, **kwargs) + super(Path, self).__init__(W, plotting=plotting, **kwargs) self.set_coordinates('line2D') diff --git a/pygsp/graphs/randomregular.py b/pygsp/graphs/randomregular.py index b3c85ea5..49c151bd 100644 --- a/pygsp/graphs/randomregular.py +++ b/pygsp/graphs/randomregular.py @@ -100,7 +100,7 @@ def __init__(self, N=64, k=6, max_iter=10, seed=None, **kwargs): v = sorted([i1, i2]) U = np.concatenate((U[:v[0]], U[v[0] + 1:v[1]], U[v[1] + 1:])) - super(RandomRegular, self).__init__(W=A, **kwargs) + super(RandomRegular, self).__init__(A, **kwargs) self.is_regular() diff --git a/pygsp/graphs/randomring.py b/pygsp/graphs/randomring.py index 6d9de235..1c24a1d5 100644 --- a/pygsp/graphs/randomring.py +++ b/pygsp/graphs/randomring.py @@ -8,12 +8,14 @@ class RandomRing(Graph): - r"""Ring graph with randomly sampled nodes. + r"""Ring graph with randomly sampled vertices. Parameters ---------- N : int Number of vertices. + angles : array_like, optional + The angular coordinate, in :math:`[0, 2\pi]`, of the vertices. seed : int Seed for the random number generator (for reproducible graphs). @@ -29,29 +31,50 @@ class RandomRing(Graph): """ - def __init__(self, N=64, seed=None, **kwargs): + def __init__(self, N=64, angles=None, seed=None, **kwargs): self.seed = seed - rs = np.random.RandomState(seed) - position = np.sort(rs.uniform(size=N), axis=0) - - weight = N * np.diff(position) - weight_end = N * (1 + position[0] - position[-1]) - - inds_i = np.arange(0, N-1) - inds_j = np.arange(1, N) - - W = sparse.csc_matrix((weight, (inds_i, inds_j)), shape=(N, N)) - W = W.tolil() - W[0, N-1] = weight_end + if angles is None: + rs = np.random.RandomState(seed) + angles = np.sort(rs.uniform(0, 2*np.pi, size=N), axis=0) + else: + angles = np.asanyarray(angles) + angles.sort() # Need to be sorted to take the difference. + N = len(angles) + if np.any(angles < 0) or np.any(angles >= 2*np.pi): + raise ValueError('Angles should be in [0, 2 pi]') + self.angles = angles + + if N < 3: + # Asymmetric graph needed for 2 as 2 distances connect them. + raise ValueError('There should be at least 3 vertices.') + + rows = range(0, N-1) + cols = range(1, N) + weights = np.diff(angles) + + # Close the loop. + rows = np.concatenate((rows, [0])) + cols = np.concatenate((cols, [N-1])) + weights = np.concatenate((weights, [2*np.pi + angles[0] - angles[-1]])) + + W = sparse.coo_matrix((weights, (rows, cols)), shape=(N, N)) W = utils.symmetrize(W, method='triu') - angle = position * 2 * np.pi - coords = np.stack([np.cos(angle), np.sin(angle)], axis=1) + # Width as the expected angle. All angles are equal to that value when + # the ring is uniformly sampled. + width = 2 * np.pi / N + assert (W.data.mean() - width) < 1e-10 + # TODO: why this kernel ? It empirically produces eigenvectors closer + # to the sines and cosines. + W.data = width / W.data + + coords = np.stack([np.cos(angles), np.sin(angles)], axis=1) plotting = {'limits': np.array([-1, 1, -1, 1])} - super(RandomRing, self).__init__(W=W, coords=coords, plotting=plotting, + # TODO: save angle and 2D position as graph signals + super(RandomRing, self).__init__(W, coords=coords, plotting=plotting, **kwargs) def _get_extra_repr(self): diff --git a/pygsp/graphs/ring.py b/pygsp/graphs/ring.py index 1c549619..e9da9b4c 100644 --- a/pygsp/graphs/ring.py +++ b/pygsp/graphs/ring.py @@ -9,6 +9,17 @@ class Ring(Graph): r"""K-regular ring graph. + A signal on the ring graph is akin to a 1-dimensional periodic signal in + classical signal processing. + + On the ring graph, the graph Fourier transform (GFT) is the classical + discrete Fourier transform (DFT_). + Actually, the Laplacian of the ring graph is a `circulant matrix`_, and any + circulant matrix is diagonalized by the DFT. + + .. _DFT: https://en.wikipedia.org/wiki/Discrete_Fourier_transform + .. _circulant matrix: https://en.wikipedia.org/wiki/Circulant_matrix + Parameters ---------- N : int @@ -16,6 +27,11 @@ class Ring(Graph): k : int Number of neighbors in each direction. + See Also + -------- + Path : 1D line with even boundary conditions + Torus : Kronecker product of two ring graphs + Examples -------- >>> import matplotlib.pyplot as plt @@ -24,12 +40,28 @@ class Ring(Graph): >>> _ = axes[0].spy(G.W) >>> _ = G.plot(ax=axes[1]) + The GFT of the ring graph is the classical DFT. + + >>> from matplotlib import pyplot as plt + >>> n_eigenvectors = 4 + >>> graph = graphs.Ring(30) + >>> fig, axes = plt.subplots(1, 2) + >>> graph.set_coordinates('line1D') + >>> graph.compute_fourier_basis() + >>> _ = graph.plot(graph.U[:, :n_eigenvectors], ax=axes[0]) + >>> _ = axes[0].legend(range(n_eigenvectors)) + >>> _ = axes[1].plot(graph.e, '.') + """ def __init__(self, N=64, k=1, **kwargs): self.k = k + if N < 3: + # Asymmetric graph needed for 2 as 2 distances connect them. + raise ValueError('There should be at least 3 vertices.') + if 2*k > N: raise ValueError('Too many neighbors requested.') @@ -57,7 +89,7 @@ def __init__(self, N=64, k=1, **kwargs): plotting = {'limits': np.array([-1, 1, -1, 1])} - super(Ring, self).__init__(W=W, plotting=plotting, **kwargs) + super(Ring, self).__init__(W, plotting=plotting, **kwargs) self.set_coordinates('ring2D') diff --git a/pygsp/graphs/sphereequiangular.py b/pygsp/graphs/sphereequiangular.py new file mode 100644 index 00000000..04f6d914 --- /dev/null +++ b/pygsp/graphs/sphereequiangular.py @@ -0,0 +1,257 @@ +# -*- coding: utf-8 -*- + +import warnings +import numpy as np +from scipy import sparse + +from pygsp.graphs import Graph # prevent circular import in Python < 3.5 + + +def _import_hp(): + try: + import healpy as hp + except Exception as e: + raise ImportError('Cannot import healpy. Choose another graph ' + 'or try to install it with ' + 'conda install healpy. ' + 'Original exception: {}'.format(e)) + return hp + + +class SphereEquiangular(Graph): + r"""Spherical-shaped graph using equirectangular sampling scheme. + + Parameters + ---------- + bandwidth : int or list or tuple + Resolution of the sampling scheme, corresponding to the bandwidth (latitude, latitude). + Use a list or tuple to have a different resolution for latitude and longitude (default = 64) + sampling : {'Driscoll-Heally', 'SOFT', 'Clenshaw-Curtis', 'Gauss-Legendre'} + sampling scheme (default = 'SOFT') + * Driscoll-Healy is the original sampling scheme of the sphere + * SOFT is an upgraded version without the poles + * Clenshaw-Curtis use the quadrature of its name to find the position of the latitude rings + * Gauss-Legendre use the quadrature of its name to find the position of the latitude rings + * Optimal Dimensionality guarranty the use of a minimum number of pixels, different for each latitude ring + distance_type : {'euclidean', 'geodesic'} + type of distance use to compute edge weights (default = 'euclidean') + + See Also + -------- + SphereHealpix, SphereIcosahedron + + Notes + ------ + Driscoll-Heally is the original sampling scheme of the sphere [1] + SOFT is an updated sampling scheme, without the poles[2] + Clenshaw-Curtis is [3] + Gauss-Legendre is [4] + The weight matrix is designed following [5]_ + + References + ---------- + [1] J. R. Driscoll et D. M. Healy, « Computing Fourier Transforms and Convolutions on the 2-Sphere », + Advances in Applied Mathematics, vol. 15, no. 2, pp. 202‑250, June 1994. + [2] D. M. Healy, D. N. Rockmore, P. J. Kostelec, et S. Moore, « FFTs for the 2-Sphere-Improvements + and Variations », Journal of Fourier Analysis and Applications, vol. 9, no. 4, pp. 341‑385, Jul. 2003. + [3] D. Hotta and M. Ujiie, ‘A nestable, multigrid-friendly grid on a sphere for global spectral models + based on Clenshaw-Curtis quadrature’, Q J R Meteorol Soc, vol. 144, no. 714, pp. 1382–1397, Jul. 2018. + [4] J. Keiner et D. Potts, « Fast evaluation of quadrature formulae on the sphere », + Math. Comp., vol. 77, no. 261, pp. 397‑419, Jan. 2008. + [5] P. Frossard and R. Khasanova, ‘Graph-Based Classification of Omnidirectional Images’, + in 2017 IEEE International Conference on Computer Vision Workshops (ICCVW), Venice, Italy, 2017, pp. 860–869. + [6] Z. Khalid, R. A. Kennedy, et J. D. McEwen, « An Optimal-Dimensionality Sampling Scheme + on the Sphere with Fast Spherical Harmonic Transforms », IEEE Transactions on Signal Processing, + vol. 62, no. 17, pp. 4597‑4610, Sept. 2014. + + Examples + -------- + >>> import matplotlib.pyplot as plt + >>> G1 = graphs.SphereEquiangular(bandwidth=6, sampling='Driscoll-Healy') + >>> G2 = graphs.SphereEquiangular(bandwidth=6, sampling='SOFT') + >>> G3 = graphs.SphereEquiangular(bandwidth=6, sampling='Clenshaw-Curtis') + >>> G4 = graphs.SphereEquiangular(bandwidth=6, sampling='Gauss-Legendre') + >>> fig = plt.figure() + >>> plt.subplots_adjust(wspace=1.) + >>> ax1 = fig.add_subplot(221, projection='3d') + >>> ax2 = fig.add_subplot(222, projection='3d') + >>> ax3 = fig.add_subplot(223, projection='3d') + >>> ax4 = fig.add_subplot(224, projection='3d') + >>> _ = G1.plot(ax=ax1, title='Driscoll-Healy', vertex_size=10) + >>> _ = G2.plot(ax=ax2, title='SOFT', vertex_size=10) + >>> _ = G3.plot(ax=ax3, title='Clenshaw-Curtis', vertex_size=10) + >>> _ = G4.plot(ax=ax4, title='Gauss-Legendre', vertex_size=10) + >>> ax1.set_xlim([0, 1]) + >>> ax1.set_ylim([-1, 0.]) + >>> ax1.set_zlim([0.5, 1.]) + >>> ax2.set_xlim([0, 1]) + >>> ax2.set_ylim([-1, 0.]) + >>> ax2.set_zlim([0.5, 1.]) + >>> ax3.set_xlim([0, 1]) + >>> ax3.set_ylim([-1, 0.]) + >>> ax3.set_zlim([0.5, 1.]) + >>> ax4.set_xlim([0, 1]) + >>> ax4.set_ylim([-1, 0.]) + >>> ax4.set_zlim([0.5, 1.]) + + """ + # TODO move OD in different file, as well as the cylinder + def __init__(self, bandwidth=64, sampling='SOFT', distance_type='euclidean', **kwargs): + if isinstance(bandwidth, int): + bandwidth = (bandwidth, bandwidth) + elif len(bandwidth)>2: + raise ValueError('Cannot have more than two bandwidths') + self.bandwidth = bandwidth + self.sampling = sampling + if sampling not in ['Driscoll-Healy', 'SOFT', 'Clenshaw-Curtis', 'Gauss-Legendre']: + raise ValueError('Unknown sampling type:' + sampling) + if distance_type not in ['geodesic', 'euclidean']: + raise ValueError('Unknown distance type value:' + distance_type) + + ## sampling and coordinates calculation + if sampling == 'Driscoll-Healy': + beta = np.arange(2 * bandwidth[0]) * np.pi / (2. * bandwidth[0]) # Driscoll-Heally + alpha = np.arange(2 * bandwidth[1]) * np.pi / bandwidth[1] + elif sampling == 'SOFT': # SO(3) Fourier Transform optimal + beta = np.pi * (2 * np.arange(2 * bandwidth[0]) + 1) / (4. * bandwidth[0]) + alpha = np.arange(2 * bandwidth[1]) * np.pi / bandwidth[1] + elif sampling == 'Clenshaw-Curtis': # Clenshaw-Curtis + warnings.warn("The weight matrix may not be optimal for this sampling scheme as it was not tested.", UserWarning) + beta = np.linspace(0, np.pi, 2 * bandwidth[0] + 1) + alpha = np.linspace(0, 2 * np.pi, 2 * bandwidth[1] + 2, endpoint=False) + elif sampling == 'Gauss-Legendre': # Gauss-legendre + warnings.warn("The weight matrix may not be optimal for this sampling scheme as it was not tested.", UserWarning) + try: + from numpy.polynomial.legendre import leggauss + except: + raise ImportError("cannot import legendre quadrature from numpy." + "Choose another sampling type or upgrade numpy.") + quad, _ = leggauss(bandwidth[0] + 1) # TODO: leggauss docs state that this may not be only stable for orders > 100 + beta = np.arccos(quad) + alpha = np.arange(2 * bandwidth[1] + 2) * np.pi / (bandwidth[1] + 1) + theta, phi = np.meshgrid(*(beta, alpha), indexing='ij') + self.lat, self.lon = theta-np.pi/2, phi + self.bwlat, self.bwlon = theta.shape + ct = np.cos(theta).flatten() + st = np.sin(theta).flatten() + cp = np.cos(phi).flatten() + sp = np.sin(phi).flatten() + x = st * cp + y = st * sp + z = ct + coords = np.vstack([x, y, z]).T + coords = np.asarray(coords, dtype=np.float32) + self.npix = len(coords) + + ## neighbors and weight matrix calculation + def south(x): + if x >= self.npix - self.bwlon: + return (x + self.bwlon//2) % self.bwlon + self.npix - self.bwlon + return x + self.bwlon + + def north(x): + if x < self.bwlon: + return (x + self.bwlon//2)%self.bwlon + return x - self.bwlon + + def west(x): + if x % self.bwlon < 1: + try: + assert x//self.bwlon == (x-1+self.bwlon)//self.bwlon + except: + raise + x += self.bwlon + else: + try: + assert x//self.bwlon == (x-1)//self.bwlon + except: + raise + return x - 1 + + def east(x): + if x % self.bwlon >= self.bwlon-1: + try: + assert x//self.bwlon == (x+1-self.bwlon)//self.bwlon + except: + raise + x -= self.bwlon + else: + try: + assert x//self.bwlon == (x+1)//self.bwlon + except: + raise + return x + 1 + + col_index = [] + for ind in range(self.npix): + # if neighbors==8: + # neighbor = [south(west(ind)), west(ind), north(west(ind)), north(ind), + # north(east(ind)), east(ind), south(east(ind)), south(ind)] + # elif neighbors==4: + # if self.sampling == 'DH' and x < self.lon: + # neighbor = [] + neighbor = [west(ind), north(ind), east(ind), south(ind)] + # else: + # neighbor = [] + col_index += neighbor + col_index = np.asarray(col_index) + row_index = np.repeat(np.arange(self.npix), 4) + + keep = (col_index < self.npix) + keep &= (col_index >= 0) + col_index = col_index[keep] + row_index = row_index[keep] + + if distance_type == 'geodesic': + hp = _import_hp() + distances = np.zeros(len(row_index)) + for i, (pos1, pos2) in enumerate(zip(coords[row_index], coords[col_index])): + d1, d2 = hp.rotator.vec2dir(pos1.T, lonlat=False).T, hp.rotator.vec2dir(pos2.T, lonlat=False).T + distances[i] = hp.rotator.angdist(d1, d2, lonlat=False) + else: + distances = np.sum((coords[row_index] - coords[col_index])**2, axis=1) + + # Compute similarities / edge weights. + # kernel_width = np.mean(distances) + + # weights = np.exp(-distances / (2 * kernel_width)) + weights = 1/(distances+1e-8) # TODO: find a better representation for sampling 'Driscoll-Heally' + + W = sparse.csr_matrix( + (weights, (row_index, col_index)), shape=(self.npix, self.npix), dtype=np.float32) + + plotting = {"limits": np.array([-1, 1, -1, 1, -1, 1])} + super(SphereEquiangular, self).__init__(adjacency=W, coords=coords, + plotting=plotting, **kwargs) + + +if __name__=='__main__': + import matplotlib.pyplot as plt + from mpl_toolkits.mplot3d import Axes3D + G1 = SphereEquiangular(bandwidth=6, sampling='Driscoll-Healy') # (384, 576) + G2 = SphereEquiangular(bandwidth=6, sampling='SOFT') + G3 = SphereEquiangular(bandwidth=6, sampling='Clenshaw-Curtis') + G4 = SphereEquiangular(bandwidth=6, sampling='Gauss-Legendre') + fig = plt.figure() + plt.subplots_adjust(wspace=1.) + ax1 = fig.add_subplot(221, projection='3d') + ax2 = fig.add_subplot(222, projection='3d') + ax3 = fig.add_subplot(223, projection='3d') + ax4 = fig.add_subplot(224, projection='3d') + _ = G1.plot(ax=ax1, title='Driscoll-Healy', vertex_size=10) + _ = G2.plot(ax=ax2, title='SOFT', vertex_size=10) + _ = G3.plot(ax=ax3, title='Clenshaw-Curtis', vertex_size=10) + _ = G4.plot(ax=ax4, title='Gauss-Legendre', vertex_size=10) + ax1.set_xlim([0, 1]) + ax1.set_ylim([-1, 0.]) + ax1.set_zlim([0.5, 1.]) + ax2.set_xlim([0, 1]) + ax2.set_ylim([-1, 0.]) + ax2.set_zlim([0.5, 1.]) + ax3.set_xlim([0, 1]) + ax3.set_ylim([-1, 0.]) + ax3.set_zlim([0.5, 1.]) + ax4.set_xlim([0, 1]) + ax4.set_ylim([-1, 0.]) + ax4.set_zlim([0.5, 1.]) + plt.show() diff --git a/pygsp/graphs/stochasticblockmodel.py b/pygsp/graphs/stochasticblockmodel.py index 4f90e362..63918b72 100644 --- a/pygsp/graphs/stochasticblockmodel.py +++ b/pygsp/graphs/stochasticblockmodel.py @@ -41,8 +41,9 @@ class StochasticBlockModel(Graph): Allow self loops if True (default is False). connected : bool Force the graph to be connected (default is False). - n_try : int - Maximum number of trials to get a connected graph (default is 10). + n_try : int or None + Maximum number of trials to get a connected graph. If None, it will try + forever. seed : int Seed for the random number generator (for reproducible graphs). @@ -79,7 +80,7 @@ def __init__(self, N=1024, k=5, z=None, M=None, p=0.7, q=None, if M is None: self.p = p - p = np.asarray(p) + p = np.asanyarray(p) if p.size == 1: p = p * np.ones(k) if p.shape != (k,): @@ -89,7 +90,7 @@ def __init__(self, N=1024, k=5, z=None, M=None, p=0.7, q=None, if q is None: q = 0.3 / k self.q = q - q = np.asarray(q) + q = np.asanyarray(q) if q.size == 1: q = q * np.ones((k, k)) if q.shape != (k, k): @@ -108,7 +109,7 @@ def __init__(self, N=1024, k=5, z=None, M=None, p=0.7, q=None, # Along the lines of np.random.uniform(size=(N, N)) < p. # Or similar to sparse.random(N, N, p, data_rvs=lambda n: np.ones(n)). - for nb_iter in range(n_try): + while (n_try is None) or (n_try > 0): nb_row, nb_col = 0, 0 csr_data, csr_i, csr_j = [], [], [] @@ -132,19 +133,19 @@ def __init__(self, N=1024, k=5, z=None, M=None, p=0.7, q=None, if not connected: break - else: - self.W = W - if self.is_connected(recompute=True): - break - if nb_iter == n_try - 1: - raise ValueError('The graph could not be connected after {} ' - 'trials. Increase the connection probability ' - 'or the number of trials.'.format(n_try)) + if Graph(W).is_connected(): + break + if n_try is not None: + n_try -= 1 + if connected and n_try == 0: + raise ValueError('The graph could not be connected after {} ' + 'trials. Increase the connection probability ' + 'or the number of trials.'.format(self.n_try)) self.info = {'node_com': z, 'comm_sizes': np.bincount(z), 'world_rad': np.sqrt(N)} - super(StochasticBlockModel, self).__init__(W=W, **kwargs) + super(StochasticBlockModel, self).__init__(W, **kwargs) def _get_extra_repr(self): attrs = {'k': self.k} diff --git a/pygsp/graphs/swissroll.py b/pygsp/graphs/swissroll.py index a43d3138..7dc4404c 100644 --- a/pygsp/graphs/swissroll.py +++ b/pygsp/graphs/swissroll.py @@ -93,7 +93,7 @@ def __init__(self, N=400, a=1, b=4, dim=3, thresh=1e-6, s=None, 'distance': 7, } - super(SwissRoll, self).__init__(W=W, coords=coords.T, + super(SwissRoll, self).__init__(W, coords=coords.T, plotting=plotting, **kwargs) diff --git a/pygsp/graphs/torus.py b/pygsp/graphs/torus.py index 20520180..1124b7a9 100644 --- a/pygsp/graphs/torus.py +++ b/pygsp/graphs/torus.py @@ -9,16 +9,20 @@ class Torus(Graph): r"""Sampled torus manifold. + On the torus, the graph Fourier transform (GFT) is the Kronecker product + between the GFT of two :class:`~pygsp.graphs.Ring` graphs. + Parameters ---------- Nv : int - Number of vertices along the first dimension (default is 16) + Number of vertices along the first dimension. Mv : int - Number of vertices along the second dimension (default is Nv) + Number of vertices along the second dimension. Default is ``Nv``. - References - ---------- - See :cite:`strang1999discrete` for more informations. + See Also + -------- + Ring : 1D line with periodic boundary conditions + Grid2d : Kronecker product of two path graphs Examples -------- @@ -92,7 +96,7 @@ def __init__(self, Nv=16, Mv=None, **kwargs): 'limits': np.array([-2.5, 2.5, -2.5, 2.5, -2.5, 2.5]) } - super(Torus, self).__init__(W=W, coords=coords, + super(Torus, self).__init__(W, coords=coords, plotting=plotting, **kwargs) def _get_extra_repr(self): diff --git a/pygsp/plotting.py b/pygsp/plotting.py index 56fcc6bf..289adcea 100644 --- a/pygsp/plotting.py +++ b/pygsp/plotting.py @@ -15,7 +15,7 @@ .. data:: BACKEND - Indicates which drawing backend to use if none are provided to the plotting + The default drawing backend to use if none are provided to the plotting functions. Should be either ``'matplotlib'`` or ``'pyqtgraph'``. In general pyqtgraph is better for interactive exploration while matplotlib is better at generating figures to be included in papers or elsewhere. @@ -128,7 +128,7 @@ def close_all(): def show(*args, **kwargs): - r"""Show created figures, alias to plt.show(). + r"""Show created figures, alias to ``plt.show()``. By default, showing plots does not block the prompt. Calling this function will block execution. @@ -138,7 +138,7 @@ def show(*args, **kwargs): def close(*args, **kwargs): - r"""Close last created figure, alias to plt.close().""" + r"""Close last created figure, alias to ``plt.close()``.""" _, plt, _ = _import_plt() plt.close(*args, **kwargs) @@ -237,7 +237,7 @@ def _plot_filter(filters, n, eigenvalues, sum, title, ax, **kwargs): """ if eigenvalues is None: - eigenvalues = hasattr(filters.G, '_e') + eigenvalues = (filters.G._e is not None) if sum is None: sum = filters.n_filters > 1 @@ -295,13 +295,13 @@ def _plot_graph(G, vertex_color, vertex_size, highlight, Parameters ---------- - vertex_color : array-like or color + vertex_color : array_like or color Signal to plot as vertex color (length is the number of vertices). If None, vertex color is set to `graph.plotting['vertex_color']`. Alternatively, a color can be set in any format accepted by matplotlib. Each vertex color can by specified by an RGB(A) array of dimension `n_vertices` x 3 (or 4). - vertex_size : array-like or int + vertex_size : array_like or int Signal to plot as vertex size (length is the number of vertices). Vertex size ranges from 0.5 to 2 times `graph.plotting['vertex_size']`. If None, vertex size is set to `graph.plotting['vertex_size']`. @@ -315,7 +315,7 @@ def _plot_graph(G, vertex_color, vertex_size, highlight, Whether to draw edges in addition to vertices. Default to True if less than 10,000 edges to draw. Note that drawing many edges can be slow. - edge_color : array-like or color + edge_color : array_like or color Signal to plot as edge color (length is the number of edges). Edge color is given by `graph.plotting['edge_color']` and transparency ranges from 0.2 to 0.9. @@ -324,7 +324,7 @@ def _plot_graph(G, vertex_color, vertex_size, highlight, Each edge color can by specified by an RGB(A) array of dimension `n_edges` x 3 (or 4). Only available with the matplotlib backend. - edge_width : array-like or int + edge_width : array_like or int Signal to plot as edge width (length is the number of edges). Edge width ranges from 0.5 to 2 times `graph.plotting['edge_width']`. If None, edge width is set to `graph.plotting['edge_width']`. @@ -403,11 +403,17 @@ def check_shape(signal, name, length, many=False): raise ValueError(txt) def normalize(x): - """Scale values in [0.25, 1]. Return 0.5 if constant.""" + """Scale values in [intercept, 1]. Return 0.5 if constant. + + Set intercept value in G.plotting["normalize_intercept"] + with value in [0, 1], default is .25. + """ ptp = x.ptp() if ptp == 0: return np.full(x.shape, 0.5) - return 0.75 * (x - x.min()) / ptp + 0.25 + else: + intercept = G.plotting['normalize_intercept'] + return (1. - intercept) * (x - x.min()) / ptp + intercept def is_color(color): @@ -432,14 +438,14 @@ def is_color(color): limits = [0, 0] colorbar = False else: - vertex_color = np.asarray(vertex_color).squeeze() + vertex_color = np.asanyarray(vertex_color).squeeze() check_shape(vertex_color, 'Vertex color', G.n_vertices, many=(G.coords.ndim == 1)) if vertex_size is None: vertex_size = G.plotting['vertex_size'] elif not np.isscalar(vertex_size): - vertex_size = np.asarray(vertex_size).squeeze() + vertex_size = np.asanyarray(vertex_size).squeeze() check_shape(vertex_size, 'Vertex size', G.n_vertices) vertex_size = G.plotting['vertex_size'] * 4 * normalize(vertex_size)**2 @@ -449,7 +455,7 @@ def is_color(color): if edge_color is None: edge_color = (G.plotting['edge_color'],) elif not is_color(edge_color): - edge_color = np.asarray(edge_color).squeeze() + edge_color = np.asanyarray(edge_color).squeeze() check_shape(edge_color, 'Edge color', G.n_edges) edge_color = 0.9 * normalize(edge_color) edge_color = [ @@ -526,7 +532,8 @@ def _plt_plot_graph(G, vertex_color, vertex_size, highlight, ax.plot(G.coords, vertex_color, alpha=0.5) ax.set_ylim(limits) for coord_hl in coords_hl: - ax.axvline(x=coord_hl, color='C1', linewidth=2) + ax.axvline(x=coord_hl, color=G.plotting['highlight_color'], + linewidth=2) else: sc = ax.scatter(*G.coords.T, @@ -539,7 +546,8 @@ def _plt_plot_graph(G, vertex_color, vertex_size, highlight, size_hl = vertex_size[highlight] ax.scatter(*coords_hl.T, s=2*size_hl, zorder=3, - marker='o', c='None', edgecolors='C1', linewidths=2) + marker='o', c='None', + edgecolors=G.plotting['highlight_color'], linewidths=2) if G.coords.shape[1] == 3: try: diff --git a/pygsp/reduction.py b/pygsp/reduction.py index 7e5bd73b..50a8ae67 100644 --- a/pygsp/reduction.py +++ b/pygsp/reduction.py @@ -122,7 +122,7 @@ def graph_sparsify(M, epsilon, maxiter=10): sparserW = sparserW + sparserW.T sparserL = sparse.diags(sparserW.diagonal(), 0) - sparserW - if graphs.Graph(W=sparserW).is_connected(): + if graphs.Graph(sparserW).is_connected(): break elif i == maxiter - 1: logger.warning('Despite attempts to reduce epsilon, sparsified graph is disconnected') @@ -134,7 +134,7 @@ def graph_sparsify(M, epsilon, maxiter=10): if not M.is_directed(): sparserW = (sparserW + sparserW.T) / 2. - Mnew = graphs.Graph(W=sparserW) + Mnew = graphs.Graph(sparserW) #M.copy_graph_attributes(Mnew) else: Mnew = sparse.lil_matrix(sparserL) @@ -257,7 +257,7 @@ def graph_multiresolution(G, levels, sparsify=True, sparsify_eps=None, for i in range(levels): if downsampling_method == 'largest_eigenvector': - if hasattr(Gs[i], '_U'): + if Gs[i]._U is not None: V = Gs[i].U[:, -1] else: V = linalg.eigs(Gs[i].L, 1)[1][:, 0] @@ -360,7 +360,7 @@ def kron_reduction(G, ind): Wnew = Wnew - Wnew.diagonal() coords = G.coords[ind, :] if len(G.coords.shape) else np.ndarray(None) - Gnew = graphs.Graph(W=Wnew, coords=coords, lap_type=G.lap_type, + Gnew = graphs.Graph(Wnew, coords=coords, lap_type=G.lap_type, plotting=G.plotting) else: Gnew = Lnew @@ -472,7 +472,7 @@ def pyramid_synthesis(Gs, cap, pe, order=30, **kwargs): """ least_squares = bool(kwargs.pop('least_squares', False)) - def_ul = Gs[0].N > 3000 or not hasattr(Gs[0], '_e') or not hasattr(Gs[0], '_U') + def_ul = Gs[0].N > 3000 or Gs[0]._e is None or Gs[0]._U is None use_landweber = bool(kwargs.pop('use_landweber', def_ul)) reg_eps = float(kwargs.get('reg_eps', 0.005)) diff --git a/pygsp/tests/test_all.py b/pygsp/tests/test_all.py index 2d52a185..1fc9a55d 100755 --- a/pygsp/tests/test_all.py +++ b/pygsp/tests/test_all.py @@ -14,9 +14,11 @@ from pygsp.tests import test_docstrings from pygsp.tests import test_plotting from pygsp.tests import test_learning +from pygsp.tests import test_nearest_neighbor suites = [] +suites.append(test_nearest_neighbor.suite) suites.append(test_graphs.suite) suites.append(test_filters.suite) suites.append(test_utils.suite) diff --git a/pygsp/tests/test_docstrings.py b/pygsp/tests/test_docstrings.py index b8837a64..d8f7ff45 100644 --- a/pygsp/tests/test_docstrings.py +++ b/pygsp/tests/test_docstrings.py @@ -32,11 +32,12 @@ def setup(doctest): 'np': numpy, } + # Docstrings from reference documentation. -suite_documentation = test_docstrings('pygsp', '.py', setup) +suite_reference = test_docstrings('pygsp', '.py', setup) # Docstrings from tutorials. # No setup to not forget imports. suite_tutorials = test_docstrings('.', '.rst') -suite = unittest.TestSuite([suite_documentation, suite_tutorials]) +suite = unittest.TestSuite([suite_reference, suite_tutorials]) diff --git a/pygsp/tests/test_filters.py b/pygsp/tests/test_filters.py index 0bf066f5..cef1689d 100644 --- a/pygsp/tests/test_filters.py +++ b/pygsp/tests/test_filters.py @@ -149,7 +149,7 @@ def test_frame(self): def get_frame(freq_response): return self._G.U.dot(np.diag(freq_response).dot(self._G.U.T)) gL = np.concatenate([get_frame(gl) for gl in g.evaluate(self._G.e)]) - np.testing.assert_allclose(gL1, gL) + np.testing.assert_allclose(gL1, gL, atol=1e-10) np.testing.assert_allclose(gL2, gL, atol=1e-10) def test_complement(self, frame_bound=2.5): diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index 5a3d1b14..19c1220d 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -7,6 +7,9 @@ from __future__ import division +import os +import random +import sys import unittest import numpy as np @@ -35,29 +38,119 @@ def tearDownClass(cls): pass def test_graph(self): - W = np.arange(16).reshape(4, 4) - G = graphs.Graph(W) - np.testing.assert_allclose(G.W.A, W) - np.testing.assert_allclose(G.A.A, G.W.A > 0) - self.assertEqual(G.N, 4) - np.testing.assert_allclose(G.d, np.array([3, 4, 4, 4])) - self.assertEqual(G.Ne, 15) - self.assertTrue(G.is_directed()) - ki, kj = np.nonzero(G.A) - self.assertEqual(ki.shape[0], G.Ne) - self.assertEqual(kj.shape[0], G.Ne) + adjacency = [ + [0., 3., 0., 2.], + [3., 0., 4., 0.], + [0., 4., 0., 5.], + [2., 0., 5., 0.], + ] + + # Input types. + G = graphs.Graph(adjacency) + self.assertIs(type(G.W), sparse.csr_matrix) + adjacency = np.array(adjacency) + G = graphs.Graph(adjacency) + self.assertIs(type(G.W), sparse.csr_matrix) + adjacency = sparse.coo_matrix(adjacency) + G = graphs.Graph(adjacency) + self.assertIs(type(G.W), sparse.csr_matrix) + adjacency = sparse.csr_matrix(adjacency) + # G = graphs.Graph(adjacency) + # self.assertIs(G.W, adjacency) # Not copied if already CSR. + + # Attributes. + np.testing.assert_allclose(G.W.toarray(), adjacency.toarray()) + np.testing.assert_allclose(G.A.toarray(), G.W.toarray() > 0) + np.testing.assert_allclose(G.d, np.array([2, 2, 2, 2])) + np.testing.assert_allclose(G.dw, np.array([5, 7, 9, 7])) + self.assertEqual(G.n_vertices, 4) + self.assertIs(G.N, G.n_vertices) + self.assertEqual(G.n_edges, 4) + self.assertIs(G.Ne, G.n_edges) + + # Errors and warnings. + self.assertRaises(ValueError, graphs.Graph, np.ones((3, 4))) + self.assertRaises(ValueError, graphs.Graph, np.ones((3, 3, 4))) + self.assertRaises(ValueError, graphs.Graph, [[0, np.nan], [0, 0]]) + self.assertRaises(ValueError, graphs.Graph, [[0, np.inf], [0, 0]]) + if sys.version_info > (3, 4): # no assertLogs in python 2.7 + with self.assertLogs(level='WARNING'): + graphs.Graph([[0, -1], [-1, 0]]) + with self.assertLogs(level='WARNING'): + graphs.Graph([[1, 1], [1, 0]]) + for attr in ['A', 'd', 'dw', 'lmax', 'U', 'e', 'coherence', 'D']: + # FIXME: The Laplacian L should be there as well. + self.assertRaises(AttributeError, setattr, G, attr, None) + self.assertRaises(AttributeError, delattr, G, attr) def test_degree(self): - W = 0.3 * (np.ones((4, 4)) - np.diag(4 * [1])) - G = graphs.Graph(W) - A = np.ones(W.shape) - np.diag(np.ones(4)) - np.testing.assert_allclose(G.A.toarray(), A) - np.testing.assert_allclose(G.d, 3 * np.ones([4])) - np.testing.assert_allclose(G.dw, 3 * 0.3) + graph = graphs.Graph([ + [0, 1, 0], + [1, 0, 2], + [0, 2, 0], + ]) + self.assertEqual(graph.is_directed(), False) + np.testing.assert_allclose(graph.d, [1, 2, 1]) + np.testing.assert_allclose(graph.dw, [1, 3, 2]) + graph = graphs.Graph([ + [0, 1, 0], + [0, 0, 2], + [0, 2, 0], + ]) + self.assertEqual(graph.is_directed(), True) + np.testing.assert_allclose(graph.d, [0.5, 1.5, 1]) + np.testing.assert_allclose(graph.dw, [0.5, 2.5, 2]) + + def test_is_connected(self): + graph = graphs.Graph([ + [0, 1, 0], + [1, 0, 2], + [0, 2, 0], + ]) + self.assertEqual(graph.is_directed(), False) + self.assertEqual(graph.is_connected(), True) + graph = graphs.Graph([ + [0, 1, 0], + [1, 0, 0], + [0, 2, 0], + ]) + self.assertEqual(graph.is_directed(), True) + self.assertEqual(graph.is_connected(), False) + graph = graphs.Graph([ + [0, 1, 0], + [1, 0, 0], + [0, 0, 0], + ]) + self.assertEqual(graph.is_directed(), False) + self.assertEqual(graph.is_connected(), False) + graph = graphs.Graph([ + [0, 1, 0], + [0, 0, 2], + [3, 0, 0], + ]) + self.assertEqual(graph.is_directed(), True) + self.assertEqual(graph.is_connected(), True) + + def test_is_directed(self): + graph = graphs.Graph([ + [0, 3, 0, 0], + [3, 0, 4, 0], + [0, 4, 0, 2], + [0, 0, 2, 0], + ]) + assert graph.W.nnz == 6 + self.assertEqual(graph.is_directed(), False) + # In-place modification is not allowed anymore. + # graph.W[0, 1] = 0 + # assert graph.W.nnz == 6 + # self.assertEqual(graph.is_directed(recompute=True), True) + # graph.W[1, 0] = 0 + # assert graph.W.nnz == 6 + # self.assertEqual(graph.is_directed(recompute=True), False) def test_laplacian(self): - adjacency = np.array([ + G = graphs.Graph([ [0, 3, 0, 1], [3, 0, 1, 0], [0, 1, 0, 3], @@ -69,20 +162,18 @@ def test_laplacian(self): [+0, -1, +4, -3], [-1, +0, -3, +4], ]) - G = graphs.Graph(adjacency) self.assertFalse(G.is_directed()) G.compute_laplacian('combinatorial') np.testing.assert_allclose(G.L.toarray(), laplacian) G.compute_laplacian('normalized') np.testing.assert_allclose(G.L.toarray(), laplacian/4) - adjacency = np.array([ + G = graphs.Graph([ [0, 6, 0, 1], [0, 0, 0, 0], [0, 2, 0, 3], [1, 0, 3, 0], ]) - G = graphs.Graph(adjacency) self.assertTrue(G.is_directed()) G.compute_laplacian('combinatorial') np.testing.assert_allclose(G.L.toarray(), laplacian) @@ -119,9 +210,9 @@ def test_estimate_lmax(self): self.assertRaises(ValueError, graph.estimate_lmax, method='unk') def check_lmax(graph, lmax): - graph.estimate_lmax(method='bounds', recompute=True) + graph.estimate_lmax(method='bounds') np.testing.assert_allclose(graph.lmax, lmax) - graph.estimate_lmax(method='lanczos', recompute=True) + graph.estimate_lmax(method='lanczos') np.testing.assert_allclose(graph.lmax, lmax*1.01) graph.compute_fourier_basis() np.testing.assert_allclose(graph.lmax, lmax) @@ -133,22 +224,22 @@ def check_lmax(graph, lmax): check_lmax(graph, lmax=value*n_nodes) # Regular bipartite graph (bound is tight). - adjacency = np.array([ + adjacency = [ [0, 0, 1, 1], [0, 0, 1, 1], [1, 1, 0, 0], [1, 1, 0, 0], - ]) + ] graph = graphs.Graph(adjacency, lap_type='combinatorial') check_lmax(graph, lmax=4) # Bipartite graph (bound is tight). - adjacency = np.array([ + adjacency = [ [0, 0, 1, 1], [0, 0, 1, 0], [1, 1, 0, 0], [1, 0, 0, 0], - ]) + ] graph = graphs.Graph(adjacency, lap_type='normalized') check_lmax(graph, lmax=2) @@ -246,8 +337,8 @@ def test_incidence_nx(graph): np.testing.assert_equal(incidence_pg, incidence_nx.toarray()) for graph in [graphs.Graph(np.zeros((n_vertices, n_vertices))), graphs.Graph(np.identity(n_vertices)), - graphs.Graph(np.array([[0, 0.8], [0.8, 0]])), - graphs.Graph(np.array([[1.3, 0], [0.4, 0.5]])), + graphs.Graph([[0, 0.8], [0.8, 0]]), + graphs.Graph([[1.3, 0], [0.4, 0.5]]), graphs.ErdosRenyi(n_vertices, directed=False, seed=42), graphs.ErdosRenyi(n_vertices, directed=True, seed=42)]: for lap_type in ['combinatorial', 'normalized']: @@ -331,6 +422,13 @@ def test(adjacency): test(sparse.csc_matrix(W)) test(sparse.coo_matrix(W)) + def test_set_signal(self, name='test'): + signal = np.zeros(self._G.n_vertices) + self._G.set_signal(signal, name) + self.assertIs(self._G.signals[name], signal) + signal = np.zeros(self._G.n_vertices // 2) + self.assertRaises(ValueError, self._G.set_signal, signal, name) + def test_set_coordinates(self): G = graphs.FullConnected() coords = self._rs.uniform(size=(G.N, 2)) @@ -348,6 +446,16 @@ def test_set_coordinates(self): G.set_coordinates('community2D') self.assertRaises(ValueError, G.set_coordinates, 'invalid') + + def test_subgraph(self, n_vertices=100): + self._G.set_signal(self._G.coords, 'coords') + graph = self._G.subgraph(range(n_vertices)) + self.assertEqual(graph.n_vertices, n_vertices) + self.assertEqual(graph.coords.shape, (n_vertices, 2)) + self.assertEqual(graph.signals['coords'].shape, (n_vertices, 2)) + self.assertIs(graph.lap_type, self._G.lap_type) + self.assertEqual(graph.plotting, self._G.plotting) + def test_nngraph(self, n_vertices=30): rs = np.random.RandomState(42) Xin = rs.normal(size=(n_vertices, 3)) @@ -398,6 +506,8 @@ def test_randomregular(self): def test_ring(self): graphs.Ring() graphs.Ring(N=32, k=16) + self.assertRaises(ValueError, graphs.Ring, 2) + self.assertRaises(ValueError, graphs.Ring, 5, k=3) def test_community(self): graphs.Community() @@ -421,10 +531,10 @@ def test_stochasticblockmodel(self): graphs.StochasticBlockModel(N=100, directed=False) graphs.StochasticBlockModel(N=100, self_loops=True) graphs.StochasticBlockModel(N=100, self_loops=False) - graphs.StochasticBlockModel(N=100, connected=True, n_try=100) + graphs.StochasticBlockModel(N=100, connected=True, seed=42) graphs.StochasticBlockModel(N=100, connected=False) self.assertRaises(ValueError, graphs.StochasticBlockModel, - N=100, p=0, q=0, connected=True, n_try=100) + N=100, p=0, q=0, connected=True) def test_airfoil(self): graphs.Airfoil() @@ -437,8 +547,8 @@ def test_davidsensornet(self): def test_erdosreny(self): graphs.ErdosRenyi(N=100, connected=False, directed=False) graphs.ErdosRenyi(N=100, connected=False, directed=True) - graphs.ErdosRenyi(N=100, connected=True, n_try=100, directed=False) - graphs.ErdosRenyi(N=100, connected=True, n_try=100, directed=True) + graphs.ErdosRenyi(N=100, connected=True, directed=False, seed=42) + graphs.ErdosRenyi(N=100, connected=True, directed=True, seed=42) G = graphs.ErdosRenyi(N=100, p=1, self_loops=True) self.assertEqual(G.W.nnz, 100**2) @@ -453,6 +563,12 @@ def test_path(self): def test_randomring(self): graphs.RandomRing() + G = graphs.RandomRing(angles=[0, 2, 1]) + self.assertEqual(G.N, 3) + self.assertRaises(ValueError, graphs.RandomRing, 2) + self.assertRaises(ValueError, graphs.RandomRing, angles=[0, 2]) + self.assertRaises(ValueError, graphs.RandomRing, angles=[0, 2, 7]) + self.assertRaises(ValueError, graphs.RandomRing, angles=[0, 2, -1]) def test_swissroll(self): graphs.SwissRoll(srtype='uniform') @@ -472,5 +588,276 @@ def test_imgpatches(self): def test_grid2dimgpatches(self): graphs.Grid2dImgPatches(img=self._img, patch_shape=(3, 3)) - -suite = unittest.TestLoader().loadTestsFromTestCase(TestCase) + def test_grid2d_diagonals(self): + value = 0.5 + G = graphs.Grid2d(6, 7, diagonal=value) + self.assertEqual(G.W[2, 8], value) + self.assertEqual(G.W[9, 1], value) + self.assertEqual(G.W[9, 3], value) + self.assertEqual(G.W[2, 14], 0.0) + self.assertEqual(G.W[17, 1], 0.0) + self.assertEqual(G.W[9, 16], 1.0) + self.assertEqual(G.W[20, 27], 1.0) + +suite_graphs = unittest.TestLoader().loadTestsFromTestCase(TestCase) + + +class TestImportExport(unittest.TestCase): + + def test_networkx_export_import(self): + # Export to networkx and reimport to PyGSP + + # Exporting the Bunny graph + g = graphs.Bunny() + g_nx = g.to_networkx() + g2 = graphs.Graph.from_networkx(g_nx) + np.testing.assert_array_equal(g.W.todense(), g2.W.todense()) + + def test_networkx_import_export(self): + # Import from networkx then export to networkx again + g_nx = nx.gnm_random_graph(100, 50) # Generate a random graph + g = graphs.Graph.from_networkx(g_nx).to_networkx() + + np.testing.assert_array_equal(nx.adjacency_matrix(g_nx).todense(), + nx.adjacency_matrix(g).todense()) + + @unittest.skipIf(sys.version_info < (3,), 'old graph-tool') + def test_graphtool_export_import(self): + # Export to graph tool and reimport to PyGSP directly + # The exported graph is a simple one without an associated Signal + g = graphs.Bunny() + g_gt = g.to_graphtool() + g2 = graphs.Graph.from_graphtool(g_gt) + np.testing.assert_array_equal(g.W.todense(), g2.W.todense()) + + @unittest.skipIf(sys.version_info < (3,), 'old graph-tool') + def test_graphtool_multiedge_import(self): + # Manualy create a graph with multiple edges + g_gt = gt.Graph() + g_gt.add_vertex(n=10) + # connect edge (3,6) three times + for i in range(3): + g_gt.add_edge(g_gt.vertex(3), g_gt.vertex(6)) + g = graphs.Graph.from_graphtool(g_gt) + self.assertEqual(g.W[3, 6], 3.0) + + eprop_double = g_gt.new_edge_property("double") + + # Set the weight of 2 out of the 3 edges. The last one has a default weight of 0 + e = g_gt.edge(3, 6, all_edges=True) + eprop_double[e[0]] = 8.0 + eprop_double[e[1]] = 1.0 + + g_gt.edge_properties["weight"] = eprop_double + g3 = graphs.Graph.from_graphtool(g_gt) + self.assertEqual(g3.W[3, 6], 9.0) + + @unittest.skipIf(sys.version_info < (3,), 'old graph-tool') + def test_graphtool_import_export(self): + # Import to PyGSP and export again to graph tool directly + # create a random graphTool graph that does not contain multiple edges and no signal + graph_gt = gt.generation.random_graph(100, lambda : (np.random.poisson(4), np.random.poisson(4))) + + eprop_double = graph_gt.new_edge_property("double") + for e in graph_gt.edges(): + eprop_double[e] = random.random() + graph_gt.edge_properties["weight"] = eprop_double + + graph2_gt = graphs.Graph.from_graphtool(graph_gt).to_graphtool() + + self.assertEqual(graph_gt.num_edges(), graph2_gt.num_edges(), + "the number of edges does not correspond") + + def key(edge): return str(edge.source()) + ":" + str(edge.target()) + + for e1, e2 in zip(sorted(graph_gt.edges(), key=key), sorted(graph2_gt.edges(), key=key)): + self.assertEqual(e1.source(), e2.source()) + self.assertEqual(e1.target(), e2.target()) + for v1, v2 in zip(graph_gt.vertices(), graph2_gt.vertices()): + self.assertEqual(v1, v2) + + def test_networkx_signal_export(self): + graph = graphs.BarabasiAlbert(N=100, seed=42) + rs = np.random.RandomState(42) + signal1 = rs.normal(size=graph.N) + signal2 = rs.normal(size=graph.N) + graph.set_signal(signal1, "signal1") + graph.set_signal(signal2, "signal2") + graph_nx = graph.to_networkx() + for i in range(graph.N): + self.assertEqual(graph_nx.node[i]["signal1"], signal1[i]) + self.assertEqual(graph_nx.node[i]["signal2"], signal2[i]) + # invalid signal type + graph = graphs.Path(3) + graph.set_signal(np.array(['a', 'b', 'c']), 'sig') + self.assertRaises(ValueError, graph.to_networkx) + + def test_graphtool_signal_export(self): + g = graphs.Logo() + rs = np.random.RandomState(42) + s = rs.normal(size=g.N) + s2 = rs.normal(size=g.N) + g.set_signal(s, "signal1") + g.set_signal(s2, "signal2") + g_gt = g.to_graphtool() + # Check the signals on all nodes + for i, v in enumerate(g_gt.vertices()): + self.assertEqual(g_gt.vertex_properties["signal1"][v], s[i]) + self.assertEqual(g_gt.vertex_properties["signal2"][v], s2[i]) + # invalid signal type + graph = graphs.Path(3) + graph.set_signal(np.array(['a', 'b', 'c']), 'sig') + self.assertRaises(TypeError, graph.to_graphtool) + + @unittest.skipIf(sys.version_info < (3,), 'old graph-tool') + def test_graphtool_signal_import(self): + g_gt = gt.Graph() + g_gt.add_vertex(10) + + g_gt.add_edge(g_gt.vertex(3), g_gt.vertex(6)) + g_gt.add_edge(g_gt.vertex(4), g_gt.vertex(6)) + g_gt.add_edge(g_gt.vertex(7), g_gt.vertex(2)) + + vprop_double = g_gt.new_vertex_property("double") + + vprop_double[g_gt.vertex(0)] = 5 + vprop_double[g_gt.vertex(1)] = -3 + vprop_double[g_gt.vertex(2)] = 2.4 + + g_gt.vertex_properties["signal"] = vprop_double + g = graphs.Graph.from_graphtool(g_gt) + self.assertEqual(g.signals["signal"][0], 5.0) + self.assertEqual(g.signals["signal"][1], -3.0) + self.assertEqual(g.signals["signal"][2], 2.4) + + def test_networkx_signal_import(self): + graph_nx = nx.Graph() + graph_nx.add_nodes_from(range(2, 5)) + graph_nx.add_edges_from([(3, 4), (2, 4), (3, 5)]) + nx.set_node_attributes(graph_nx, {2: 4, 3: 5, 5: 2.3}, "s") + graph_pg = graphs.Graph.from_networkx(graph_nx) + np.testing.assert_allclose(graph_pg.signals["s"], [4, 5, np.nan, 2.3]) + + def test_no_weights(self): + + adjacency = np.array([[0, 1, 0], [1, 0, 1], [0, 1, 0]]) + + # NetworkX no weights. + graph_nx = nx.Graph() + graph_nx.add_edge(0, 1) + graph_nx.add_edge(1, 2) + graph_pg = graphs.Graph.from_networkx(graph_nx) + np.testing.assert_allclose(graph_pg.W.toarray(), adjacency) + + # NetworkX non-existent weight name. + graph_nx.edges[(0, 1)]['weight'] = 2 + graph_nx.edges[(1, 2)]['weight'] = 2 + graph_pg = graphs.Graph.from_networkx(graph_nx) + np.testing.assert_allclose(graph_pg.W.toarray(), 2*adjacency) + graph_pg = graphs.Graph.from_networkx(graph_nx, weight='unknown') + np.testing.assert_allclose(graph_pg.W.toarray(), adjacency) + + # Graph-tool no weights. + graph_gt = gt.Graph(directed=False) + graph_gt.add_edge(0, 1) + graph_gt.add_edge(1, 2) + graph_pg = graphs.Graph.from_graphtool(graph_gt) + np.testing.assert_allclose(graph_pg.W.toarray(), adjacency) + + # Graph-tool non-existent weight name. + prop = graph_gt.new_edge_property("double") + prop[(0, 1)] = 2 + prop[(1, 2)] = 2 + graph_gt.edge_properties["weight"] = prop + graph_pg = graphs.Graph.from_graphtool(graph_gt) + np.testing.assert_allclose(graph_pg.W.toarray(), 2*adjacency) + graph_pg = graphs.Graph.from_graphtool(graph_gt, weight='unknown') + np.testing.assert_allclose(graph_pg.W.toarray(), adjacency) + + def test_break_join_signals(self): + """Multi-dim signals are broken on export and joined on import.""" + graph1 = graphs.Sensor(20, seed=42) + graph1.set_signal(graph1.coords, 'coords') + # networkx + graph2 = graph1.to_networkx() + graph2 = graphs.Graph.from_networkx(graph2) + np.testing.assert_allclose(graph2.signals['coords'], graph1.coords) + # graph-tool + graph2 = graph1.to_graphtool() + graph2 = graphs.Graph.from_graphtool(graph2) + np.testing.assert_allclose(graph2.signals['coords'], graph1.coords) + # save and load (need ordered dicts) + if sys.version_info >= (3, 6): + filename = 'graph.graphml' + graph1.save(filename) + graph2 = graphs.Graph.load(filename) + np.testing.assert_allclose(graph2.signals['coords'], graph1.coords) + os.remove(filename) + + @unittest.skipIf(sys.version_info < (3, 6), 'need ordered dicts') + def test_save_load(self): + + # TODO: test with multiple graphs and signals + # * dtypes (float, int, bool) of adjacency and signals + # * empty graph / isolated nodes + + G1 = graphs.Sensor(seed=42) + W = G1.W.toarray() + sig = np.random.RandomState(42).normal(size=G1.N) + G1.set_signal(sig, 's') + + for fmt in ['graphml', 'gml', 'gexf']: + for backend in ['networkx', 'graph-tool']: + + if fmt == 'gexf' and backend == 'graph-tool': + self.assertRaises(ValueError, G1.save, 'g', fmt, backend) + self.assertRaises(ValueError, graphs.Graph.load, 'g', fmt, + backend) + os.remove('g') + continue + + atol = 1e-5 if fmt == 'gml' and backend == 'graph-tool' else 0 + + for filename, fmt in [('graph.' + fmt, None), ('graph', fmt)]: + G1.save(filename, fmt, backend) + G2 = graphs.Graph.load(filename, fmt, backend) + np.testing.assert_allclose(G2.W.toarray(), W, atol=atol) + np.testing.assert_allclose(G2.signals['s'], sig, atol=atol) + os.remove(filename) + + self.assertRaises(ValueError, graphs.Graph.load, 'g.gml', fmt='?') + self.assertRaises(ValueError, graphs.Graph.load, 'g.gml', backend='?') + self.assertRaises(ValueError, G1.save, 'g.gml', fmt='?') + self.assertRaises(ValueError, G1.save, 'g.gml', backend='?') + + @unittest.skipIf(sys.version_info < (3, 3), 'need unittest.mock') + def test_import_errors(self): + from unittest.mock import patch + graph = graphs.Sensor() + filename = 'graph.gml' + with patch.dict(sys.modules, {'networkx': None}): + self.assertRaises(ImportError, graph.to_networkx) + self.assertRaises(ImportError, graphs.Graph.from_networkx, None) + self.assertRaises(ImportError, graph.save, filename, + backend='networkx') + self.assertRaises(ImportError, graphs.Graph.load, filename, + backend='networkx') + graph.save(filename) + graphs.Graph.load(filename) + with patch.dict(sys.modules, {'graph_tool': None}): + self.assertRaises(ImportError, graph.to_graphtool) + self.assertRaises(ImportError, graphs.Graph.from_graphtool, None) + self.assertRaises(ImportError, graph.save, filename, + backend='graph-tool') + self.assertRaises(ImportError, graphs.Graph.load, filename, + backend='graph-tool') + graph.save(filename) + graphs.Graph.load(filename) + with patch.dict(sys.modules, {'networkx': None, 'graph_tool': None}): + self.assertRaises(ImportError, graph.save, filename) + self.assertRaises(ImportError, graphs.Graph.load, filename) + os.remove(filename) + + +suite_import_export = unittest.TestLoader().loadTestsFromTestCase(TestImportExport) +suite = unittest.TestSuite([suite_graphs, suite_import_export]) diff --git a/pygsp/tests/test_nearest_neighbor.py b/pygsp/tests/test_nearest_neighbor.py new file mode 100644 index 00000000..5d734153 --- /dev/null +++ b/pygsp/tests/test_nearest_neighbor.py @@ -0,0 +1,70 @@ +import unittest +import numpy as np +from pygsp._nearest_neighbor import nearest_neighbor, sparse_distance_matrix + +class TestCase(unittest.TestCase): + def test_nngraph(self, n_vertices=24): + data = np.random.RandomState(42).uniform(size=(n_vertices, 3)) + metrics = ['euclidean', 'manhattan', 'max_dist', 'minkowski'] + backends = ['scipy-kdtree', 'scipy-ckdtree', 'flann', 'nmslib'] + + for metric in metrics: + for kind in ['knn', 'radius']: + for backend in backends: + params = dict(features=data, metric=metric, kind=kind, radius=0.25, k=4) + ref_nn, ref_d = nearest_neighbor(backend='scipy-pdist', **params) + # Unsupported combinations. + if backend == 'flann' and metric == 'max_dist': + self.assertRaises(ValueError, nearest_neighbor, data, + metric=metric, backend=backend) + elif backend == 'nmslib' and metric == 'minkowski': + self.assertRaises(ValueError, nearest_neighbor, data, + metric=metric, backend=backend) + elif backend == 'nmslib' and kind == 'radius': + self.assertRaises(ValueError, nearest_neighbor, data, + kind=kind, backend=backend) + else: + params['backend'] = backend + if backend == 'flann': + other_nn, other_d = nearest_neighbor(random_seed=44, **params) + else: + other_nn, other_d = nearest_neighbor(**params) + print(kind, backend) + for a,b in zip(ref_nn, other_nn): + np.testing.assert_allclose(np.sort(a),np.sort(b), rtol=1e-5) + + for a,b in zip(ref_d, other_d): + np.testing.assert_allclose(np.sort(a),np.sort(b), rtol=1e-5) + + def test_sparse_distance_matrix(self): + data = np.random.RandomState(42).uniform(size=(200, 3)) + neighbors, distances = nearest_neighbor(data) + W = sparse_distance_matrix(neighbors, distances, symmetrize=True) + # Assert symetry + np.testing.assert_allclose(W.todense(), W.T.todense()) + # positivity + np.testing.assert_array_equal(W.todense()>=0, True) + # 0 diag + np.testing.assert_array_equal(np.diag(W.todense())==0, True) + + # Assert that it is not symmetric anymore + W = sparse_distance_matrix(neighbors, distances, symmetrize=False) + assert(np.sum(np.abs(W.todense()-W.T.todense()))>0.1) + # positivity + np.testing.assert_array_equal(W.todense()>=0, True) + # 0 diag + np.testing.assert_array_equal(np.diag(W.todense())==0, True) + # everything is used once + np.testing.assert_allclose(np.sum(W.todense()), np.sum(distances)) + + # simple test with a kernel + W = sparse_distance_matrix(neighbors, 1/(1+distances), symmetrize=True) + # Assert symetry + np.testing.assert_allclose(W.todense(), W.T.todense()) + # positivity + np.testing.assert_array_equal(W.todense()>=0, True) + # 0 diag + np.testing.assert_array_equal(np.diag(W.todense())==0, True) + + +suite = unittest.TestLoader().loadTestsFromTestCase(TestCase) diff --git a/pygsp/tests/test_plotting.py b/pygsp/tests/test_plotting.py index 0aa29d44..2d07d028 100644 --- a/pygsp/tests/test_plotting.py +++ b/pygsp/tests/test_plotting.py @@ -50,8 +50,8 @@ def test_plot_graphs(self): # Classes who require parameters. if classname == 'NNGraph': - Xin = np.arange(90).reshape(30, 3) - Gs.append(Graph(Xin)) + features = np.random.RandomState(42).normal(size=(30, 3)) + Gs.append(Graph(features)) elif classname in ['ImgPatches', 'Grid2dImgPatches']: Gs.append(Graph(img=self._img, patch_shape=(3, 3))) else: @@ -71,7 +71,6 @@ def test_plot_graphs(self): for G in Gs: self.assertTrue(hasattr(G, 'coords')) - self.assertTrue(hasattr(G, 'A')) self.assertEqual(G.N, G.coords.shape[0]) signal = np.arange(G.N) + 0.3 diff --git a/pygsp/tests/test_utils.py b/pygsp/tests/test_utils.py index cbd824b1..af23f4b8 100644 --- a/pygsp/tests/test_utils.py +++ b/pygsp/tests/test_utils.py @@ -32,103 +32,5 @@ def test_symmetrize(self): np.testing.assert_equal(W1.toarray(), W2) self.assertRaises(ValueError, utils.symmetrize, W, 'sum') - def test_utils(self): - # Data init - W1 = np.arange(16).reshape((4, 4)) - mask1 = np.array([[1, 0, 1, 0], [0, 1, 0, 1], - [1, 0, 1, 0], [0, 1, 0, 1]]) - W1[mask1 == 1] = 0 - W1 = sparse.lil_matrix(W1) - G1 = graphs.Graph(W1) - lap1 = np.array([[10, -2.5, 0, -7.5], - [-2.5, 10, -7.5, 0], - [0, -7.5, 20, -12.5], - [-7.5, 0, -12.5, 20]]) - - sym1 = np.matrix([[0, 2.5, 0, 7.5], - [2.5, 0, 7.5, 0], - [0, 7.5, 0, 12.5], - [7.5, 0, 12.5, 0]]) - sym1 = sparse.lil_matrix(sym1) - weight_check1 = {'has_inf_val': False, 'has_nan_value': False, - 'is_not_square': False, 'diag_is_not_zero': False} - rep1 = {'lap': lap1, 'is_dir': True, 'weight_check': weight_check1, - 'is_conn': True, 'sym': sym1, 'lmax': 35.} - t1 = {'G': G1, 'rep': rep1} - - W2 = np.zeros((4, 4)) - W2[0, 1] = float('NaN') - W2[0, 2] = float('Inf') - G2 = graphs.Graph(W2) - weight_check2 = {'has_inf_val': True, 'has_nan_value': True, - 'is_not_square': True, 'diag_is_not_zero': False} - rep2 = {'lap': None, 'is_dir': True, 'weight_check': weight_check2, - 'is_conn': False} - t2 = {'G': G2, 'rep': rep2} - - W3 = np.zeros((4, 4)) - G3 = graphs.Graph(W3) - lap3 = W3 - sym3 = G3.W - weight_check3 = {'has_inf_val': False, 'has_nan_value': False, - 'is_not_square': False, 'diag_is_not_zero': False} - rep3 = {'lap': lap3, 'is_dir': False, 'weight_check': weight_check3, - 'is_conn': False, 'sym': sym3, 'lmax': 0.} - t3 = {'G': G3, 'rep': rep3} - - W4 = np.zeros((4, 4)) - np.fill_diagonal(W4, 1) - G4 = graphs.Graph(W4) - lap4 = np.zeros((4, 4)) - sym4 = sparse.csc_matrix(W4) - weight_check4 = {'has_inf_val': False, 'has_nan_value': False, - 'is_not_square': False, 'diag_is_not_zero': True} - rep4 = {'lap': lap4, 'is_dir': False, 'weight_check': weight_check4, - 'is_conn': False, 'sym': sym4, 'lmax': 0.} - t4 = {'G': G4, 'rep': rep4} - - test_graphs = [t1, t3, t4] - - def test_is_directed(G, is_dir): - self.assertEqual(G.is_directed(), is_dir) - - def test_laplacian(G, lap): - self.assertTrue((G.L == lap).all()) - - def test_estimate_lmax(G, lmax): - G.estimate_lmax() - self.assertTrue(lmax <= G.lmax and G.lmax <= 1.02 * lmax) - - def test_check_weights(G, w_c): - self.assertEqual(G.check_weights(), w_c) - - def test_is_connected(G, is_conn, **kwargs): - self.assertEqual(G.is_connected(), is_conn) - - def test_distanz(x, y): - # TODO test with matlab to compare - self.assertEqual(utils.distanz(x, y)) - - # Not ready yet - # def test_tree_depths(A, root): - # # mat_answser = None - # self.assertEqual(mat_answser, utils.tree_depths(A, root)) - for t in test_graphs: - test_is_directed(t['G'], t['rep']['is_dir']) - test_laplacian(t['G'], t['rep']['lap']) - test_estimate_lmax(t['G'], t['rep']['lmax']) - test_check_weights(t['G'], t['rep']['weight_check']) - test_is_connected(t['G'], t['rep']['is_conn']) - - G5 = graphs.Graph(np.arange(16).reshape((4, 4))) - checks5 = {'has_inf_val': False, 'has_nan_value': False, - 'is_not_square': False, 'diag_is_not_zero': True} - test_check_weights(G5, checks5) - - # Not ready yet - # test_tree_depths(A, root) - - # test_distanz(x, y) - suite = unittest.TestLoader().loadTestsFromTestCase(TestCase) diff --git a/setup.py b/setup.py index e1b6419f..746be6ce 100644 --- a/setup.py +++ b/setup.py @@ -31,15 +31,19 @@ 'scipy', ], extras_require={ - # Optional dependencies for some functionalities. - 'alldeps': ( + # Optional dependencies for development. Some bring additional + # functionalities, others are for testing, documentation, or packaging. + 'dev': [ # Import and export. 'networkx', + # 'graph-tool', cannot be installed by pip # Construct patch graphs from images. 'scikit-image', + # Construct Sphere graph + 'healpy', # Approximate nearest neighbors for kNN graphs. - 'pyflann; python_version == "2.*"', - 'pyflann3; python_version == "3.*"', + 'cyflann', + 'nmslib', # Convex optimization on graph. 'pyunlocbox', # Plot graphs, signals, and filters. @@ -51,22 +55,18 @@ 'PyQt5; python_version >= "3.5"', # No source package for PyQt5 on PyPI, fall back to PySide. 'PySide; python_version < "3.5"', - ), - # Testing dependencies. - 'test': [ + # Run the tests. 'flake8', 'coverage', 'coveralls', - ], - # Dependencies to build the documentation. - 'doc': [ + # Build the documentation. 'sphinx', 'numpydoc', 'sphinxcontrib-bibtex', + 'sphinx-gallery', + 'memory_profiler', 'sphinx-rtd-theme', - ], - # Dependencies to build and upload packages. - 'pkg': [ + # Build and upload packages. 'wheel', 'twine', ],