diff --git a/.cargo/config b/.cargo/config new file mode 100644 index 000000000000..d47f983e474f --- /dev/null +++ b/.cargo/config @@ -0,0 +1,11 @@ +[target.x86_64-apple-darwin] +rustflags = [ + "-C", "link-arg=-undefined", + "-C", "link-arg=dynamic_lookup", +] + +[target.aarch64-apple-darwin] +rustflags = [ + "-C", "link-arg=-undefined", + "-C", "link-arg=dynamic_lookup", +] diff --git a/.gitignore b/.gitignore index b395e6aeef10..74b9026af629 100644 --- a/.gitignore +++ b/.gitignore @@ -151,3 +151,8 @@ test/ipynb/mpl/circuit/result_test.json test/ipynb/mpl/graph/*.png test/ipynb/mpl/graph/*.zip test/ipynb/mpl/graph/result_test.json + +# Added by cargo + +/target +Cargo.lock diff --git a/.pylintrc b/.pylintrc index c45325296758..b2f744ba968f 100644 --- a/.pylintrc +++ b/.pylintrc @@ -33,7 +33,7 @@ unsafe-load-any-extension=no # A comma-separated list of package or module names from where C extensions may # be loaded. Extensions are loading into the active Python interpreter and may # run arbitrary code -extension-pkg-allow-list=retworkx, numpy, tweedledum +extension-pkg-allow-list=retworkx, numpy, tweedledum, qiskit._accelerate [MESSAGES CONTROL] diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 48486e66373d..22ebd6127080 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -225,9 +225,32 @@ build all the documentation into `docs/_build/html` and the release notes in particular will be located at `docs/_build/html/release_notes.html` ## Installing Qiskit Terra from source -Please see the [Installing Qiskit Terra from -Source](https://qiskit.org/documentation/contributing_to_qiskit.html#installing-terra-from-source) -section of the Qiskit documentation. + +Qiskit Terra is primarily written in Python but there are some core routines +that are written in the [Rust](https://www.rust-lang.org/) programming +language to improve the runtime performance. For the released versions of +qiskit-terra we publish precompiled binaries on the +[Python Package Index](https://pypi.org/) for all the supported platforms +which only requires a functional Python environment to install. However, when +building and installing from source you will need a rust compiler installed. You can do this very easily +using rustup: https://rustup.rs/ which provides a single tool to install and +configure the latest version of the rust compiler. +[Other installation methods](https://forge.rust-lang.org/infra/other-installation-methods.html) +exist too. For windows users besides rustup you will also need install +the Visual C++ build tools so that rust can link against the system c/c++ +libraries. You can see more details on this in the +[rustup documentation](https://rust-lang.github.io/rustup/installation/windows.html). + +Once you have a rust compiler installed you can rely on the normal Python +build/install steps to install Qiskit Terra. This means you just run +`pip install .` in your local git clone to build and install Qiskit Terra. + +Do note that if you do use develop mode/editable install (via `python setup.py develop` or `pip install -e .`) the Rust extension will be built in debug mode +without any optimizations enabled. This will result in poor runtime performance. +If you'd like to use an editable install with an optimized binary you can +run `python setup.py build_rust --release --inplace` after you install in +editable mode to recompile the rust extensions in release mode. + ## Test diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 000000000000..1c88d5bceb28 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,33 @@ +[package] +name = "qiskit-terra" +version = "0.20.0" +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[lib] +name = "qiskit_accelerate" +crate-type = ["cdylib"] + +[dependencies] +rayon = "1.5" +numpy = "0.15.1" +rand = "0.8" +rand_pcg = "0.3" +rand_distr = "0.4.3" + +[dependencies.pyo3] +version = "0.15.1" +features = ["extension-module", "hashbrown"] + +[dependencies.ndarray] +version = "^0.15.0" +features = ["rayon"] + +[dependencies.hashbrown] +version = "0.11.2" +features = ["rayon"] + +[profile.release] +lto = 'fat' +codegen-units = 1 diff --git a/MANIFEST.in b/MANIFEST.in index 9eb57c925b42..13898b9b59e7 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -15,3 +15,6 @@ include test/python/pickles/*.pickle include test/python/qasm/*.qasm include test/python/visualization/references/*.png include test/python/notebooks/*.ipynb + +include Cargo.toml +recursive-include src * diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 359bae6fc30f..c71f8b8e11ee 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -180,6 +180,7 @@ stages: pip install -U "cplex" "qiskit-aer" "z3-solver" -c constraints.txt mkdir -p /tmp/terra-tests cp -r test /tmp/terra-tests/. + cp tools/verify_parallel_map.py /tmp/terra-tests/. cp .stestr.conf /tmp/terra-tests/. cp -r .stestr /tmp/terra-tests/. || : sudo apt-get update @@ -193,8 +194,11 @@ stages: export PYTHONHASHSEED=$(python -S -c "import random; print(random.randint(1, 4294967295))") echo "PYTHONHASHSEED=$PYTHONHASHSEED" stestr run + python ./verify_parallel_map.py popd displayName: 'Run tests' + env: + QISKIT_PARALLEL: FALSE - task: CopyFiles@2 condition: failed() displayName: 'Copy images' @@ -239,7 +243,6 @@ stages: virtualenv image_tests image_tests/bin/pip install -U -r requirements.txt -c constraints.txt image_tests/bin/pip install -U -c constraints.txt -e ".[visualization]" - image_tests/bin/python setup.py build_ext --inplace sudo apt-get update sudo apt-get install -y graphviz pandoc image_tests/bin/pip check @@ -286,6 +289,8 @@ stages: tools/verify_headers.py qiskit test python tools/find_optional_imports.py reno lint + cargo fmt --check + cargo clippy -- -D warnings displayName: 'Style and lint' - job: 'Docs' pool: {vmImage: 'ubuntu-latest'} @@ -314,7 +319,6 @@ stages: set -e python -m pip install --upgrade pip setuptools wheel pip install -U tox - python setup.py build_ext --inplace sudo apt-get update sudo apt-get install -y graphviz displayName: 'Install dependencies' @@ -384,7 +388,10 @@ stages: export PYTHONHASHSEED=$(python -S -c "import random; print(random.randint(1, 4294967295))") echo "PYTHONHASHSEED=$PYTHONHASHSEED" stestr run + python ./tools/verify_parallel_map.py displayName: 'Run tests' + env: + QISKIT_PARALLEL: FALSE - task: CopyFiles@2 condition: failed() displayName: 'Copy images' @@ -454,10 +461,12 @@ stages: export PYTHONHASHSEED=$(python -S -c "import random; print(random.randint(1, 1024))") echo "PYTHONHASHSEED=$PYTHONHASHSEED" stestr run + python ./tools/verify_parallel_map.py displayName: 'Run tests' env: LANG: 'C.UTF-8' PYTHONIOENCODING: 'utf-8:backslashreplace' + QISKIT_PARALLEL: FALSE - task: CopyFiles@2 condition: failed() displayName: 'Copy images' @@ -538,6 +547,7 @@ stages: export PYTHONHASHSEED=$(python -S -c "import random; print(random.randint(1, 1024))") echo "PYTHONHASHSEED=$PYTHONHASHSEED" stestr run + python ./tools/verify_parallel_map.py env: LANG: 'C.UTF-8' PYTHONIOENCODING: 'utf-8:backslashreplace' @@ -630,7 +640,10 @@ stages: export PYTHONHASHSEED=$(python -S -c "import random; print(random.randint(1, 4294967295))") echo "PYTHONHASHSEED=$PYTHONHASHSEED" stestr run + python ./tools/verify_parallel_map.py displayName: 'Run tests' + env: + QISKIT_PARALLEL: FALSE - task: CopyFiles@2 condition: failed() displayName: 'Copy images' @@ -712,6 +725,7 @@ stages: export PYTHONHASHSEED=$(python -S -c "import random; print(random.randint(1, 4294967295))") echo "PYTHONHASHSEED=$PYTHONHASHSEED" stestr run + python ./tools/verify_parallel_map.py displayName: 'Run tests' - task: CopyFiles@2 condition: failed() diff --git a/examples/python/stochastic_swap.py b/examples/python/stochastic_swap.py index c07cc0731a8b..7625cba3b73a 100644 --- a/examples/python/stochastic_swap.py +++ b/examples/python/stochastic_swap.py @@ -73,23 +73,22 @@ # Build the expected output to verify the pass worked expected = QuantumCircuit(qr, cr) expected.cx(qr[1], qr[2]) +expected.h(qr[2]) expected.swap(qr[0], qr[1]) +expected.h(qr[0]) expected.cx(qr[1], qr[3]) expected.h(qr[3]) -expected.h(qr[2]) expected.measure(qr[1], cr[0]) -expected.h(qr[0]) expected.swap(qr[1], qr[3]) -expected.h(qr[3]) expected.cx(qr[2], qr[1]) +expected.h(qr[3]) +expected.swap(qr[0], qr[1]) expected.measure(qr[2], cr[2]) -expected.swap(qr[1], qr[3]) -expected.measure(qr[3], cr[3]) -expected.cx(qr[1], qr[0]) -expected.measure(qr[1], cr[0]) -expected.measure(qr[0], cr[1]) +expected.cx(qr[3], qr[1]) +expected.measure(qr[0], cr[3]) +expected.measure(qr[3], cr[0]) +expected.measure(qr[1], cr[1]) expected_dag = circuit_to_dag(expected) - # Run the pass on the dag from the input circuit pass_ = StochasticSwap(coupling, 20, 999) after = pass_.run(dag) diff --git a/pyproject.toml b/pyproject.toml index 8e5a5fd0b539..b2787bc5edfa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,6 @@ [build-system] -requires = ["Cython>=0.27.1", "setuptools", "wheel"] +requires = ["Cython>=0.27.1", "setuptools", "wheel", "setuptools-rust"] +build-backend = "setuptools.build_meta" [tool.black] line-length = 100 @@ -16,3 +17,7 @@ test-command = "python {project}/examples/python/stochastic_swap.py" # Numpy 1.22 there are no i686 wheels, so we force pip to use older ones without # restricting any dependencies that Numpy and Scipy might have. before-test = "pip install --only-binary=numpy,scipy numpy scipy" + +[tool.cibuildwheel.linux] +before-all = "yum install -y wget && {package}/tools/install_rust.sh" +environment = 'PATH="$PATH:$HOME/.cargo/bin"' diff --git a/qiskit/__init__.py b/qiskit/__init__.py index 24aa8d4ed806..45bf6fa49858 100644 --- a/qiskit/__init__.py +++ b/qiskit/__init__.py @@ -18,6 +18,15 @@ import sys import warnings +import qiskit._accelerate + +# Globally define compiled modules. The normal import mechanism will not +# find compiled submodules in _accelerate because it relies on file paths +# manually define them on import so people can directly import +# qiskit._accelerate.* submodules and not have to rely on attribute access +sys.modules["qiskit._accelerate.stochastic_swap"] = qiskit._accelerate.stochastic_swap + + # qiskit errors operator from qiskit.exceptions import QiskitError, MissingOptionalLibraryError diff --git a/qiskit/transpiler/passes/routing/cython/__init__.py b/qiskit/transpiler/passes/routing/cython/__init__.py deleted file mode 100644 index 29d444f08100..000000000000 --- a/qiskit/transpiler/passes/routing/cython/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# This code is part of Qiskit. -# -# (C) Copyright IBM 2017, 2018. -# -# This code is licensed under the Apache License, Version 2.0. You may -# obtain a copy of this license in the LICENSE.txt file in the root directory -# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. -# -# Any modifications or derivative works of this code must retain this -# copyright notice, and modified files need to carry a notice indicating -# that they have been altered from the originals. - -"""Module containing transpiler Cython code.""" diff --git a/qiskit/transpiler/passes/routing/cython/stochastic_swap/__init__.py b/qiskit/transpiler/passes/routing/cython/stochastic_swap/__init__.py deleted file mode 100644 index 5a4bde09e943..000000000000 --- a/qiskit/transpiler/passes/routing/cython/stochastic_swap/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# This code is part of Qiskit. -# -# (C) Copyright IBM 2017, 2018. -# -# This code is licensed under the Apache License, Version 2.0. You may -# obtain a copy of this license in the LICENSE.txt file in the root directory -# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. -# -# Any modifications or derivative works of this code must retain this -# copyright notice, and modified files need to carry a notice indicating -# that they have been altered from the originals. - -"""Module containing Cython code for StochasticSwap mapper.""" diff --git a/qiskit/transpiler/passes/routing/cython/stochastic_swap/swap_trial.pyx b/qiskit/transpiler/passes/routing/cython/stochastic_swap/swap_trial.pyx deleted file mode 100644 index 0647cfbd094d..000000000000 --- a/qiskit/transpiler/passes/routing/cython/stochastic_swap/swap_trial.pyx +++ /dev/null @@ -1,194 +0,0 @@ -#!python -#cython: language_level = 3 -#distutils: language = c++ - -# This code is part of Qiskit. -# -# (C) Copyright IBM 2017, 2018. -# -# This code is licensed under the Apache License, Version 2.0. You may -# obtain a copy of this license in the LICENSE.txt file in the root directory -# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. -# -# Any modifications or derivative works of this code must retain this -# copyright notice, and modified files need to carry a notice indicating -# that they have been altered from the originals. - -cimport cython -from libcpp.unordered_set cimport unordered_set as cset -from .utils cimport NLayout, EdgeCollection - -@cython.boundscheck(False) -@cython.wraparound(False) -cdef double compute_cost(const double[:, ::1] dist, - unsigned int * logic_to_phys, - int[::1] gates, unsigned int num_gates) nogil: - """ Computes the cost (distance) of a logical to physical mapping. - - Args: - dist (ndarray): An array of doubles that specifies the distance. - logic_to_phys (int *): Pointer to logical to physical array. - gates (ndarray): Array of ints giving gates in layer. - num_gates (int): The number of gates (length of gates//2). - - Returns: - double: The distance calculated. - """ - cdef unsigned int ii, jj, kk - cdef double cost = 0.0 - for kk in range(num_gates): - ii = logic_to_phys[gates[2*kk]] - jj = logic_to_phys[gates[2*kk+1]] - cost += dist[ii,jj] - return cost - -@cython.nonecheck(False) -@cython.boundscheck(False) -@cython.wraparound(False) -cdef compute_random_scaling(double[:, ::1] scale, const double[:, ::1] cdist2, - double * rand, unsigned int num_qubits): - """ Computes the symmetric random scaling (perturbation) matrix, - and places the values in the 'scale' array. - - Args: - scale (ndarray): An array of doubles where the values are to be stored. - cdist2 (ndarray): Array representing the coupling map distance squared. - rand (double *): Array of rands of length num_qubits*(num_qubits+1)//2. - num_qubits (int): Number of physical qubits. - """ - cdef size_t ii, jj, idx=0 - for ii in range(num_qubits): - for jj in range(ii): - scale[ii,jj] = rand[idx]*cdist2[ii,jj] - scale[jj,ii] = scale[ii,jj] - idx += 1 - - -@cython.nonecheck(False) -@cython.boundscheck(False) -@cython.wraparound(False) -def swap_trial(int num_qubits, NLayout int_layout, int[::1] int_qubit_subset, - int[::1] gates, const double[:, ::1] cdist2, - const double[:, ::1] cdist, - int[::1] edges, double[:, ::1] scale, object rng): - """ A single iteration of the tchastic swap mapping routine. - - Args: - num_qubits (int): The number of physical qubits. - int_layout (NLayout): The numeric (integer) representation of - the initial_layout. - int_qubit_subset (ndarray): Int ndarray listing qubits in set. - gates (ndarray): Int array with integers giving qubits on which - two-qubits gates act on. - cdist2 (ndarray): Array of doubles that gives the square of the - distance graph. - cdist (ndarray): Array of doubles that gives the distance graph. - edges (ndarray): Int array of edges in coupling map. - scale (ndarray): A double array that holds the perturbed cdist2 array. - rng (default_rng): An instance of the NumPy default_rng. - - Returns: - double: Best distance achieved in this trial. - EdgeCollection: Collection of optimal edges found. - NLayout: The optimal layout found. - int: The number of depth steps required in mapping. - """ - cdef EdgeCollection opt_edges = EdgeCollection() - cdef NLayout optimal_layout, new_layout, trial_layout = int_layout.copy() - - cdef unsigned int num_gates = gates.shape[0]//2 - cdef unsigned int num_edges = edges.shape[0]//2 - - cdef unsigned int need_copy, cost_reduced - cdef unsigned int depth_step = 1 - cdef unsigned int depth_max = 2 * num_qubits + 1 - cdef double min_cost, new_cost, dist - - cdef unsigned int start_edge, end_edge, start_qubit, end_qubit - cdef unsigned int optimal_start, optimal_end, optimal_start_qubit, optimal_end_qubit - - cdef size_t idx - - # Compute randomized distance - cdef double[::1] rand = 1.0 + rng.normal(0.0, 1.0/num_qubits, - size=num_qubits*(num_qubits+1)//2) - - compute_random_scaling(scale, cdist2, &rand[0], num_qubits) - - # Convert int qubit array to c++ set - cdef cset[unsigned int] qubit_set - cdef cset[unsigned int] input_qubit_set - - for idx in range(int_qubit_subset.shape[0]): - input_qubit_set.insert(int_qubit_subset[idx]) - - # Loop over depths from 1 up to a maximum depth - while depth_step < depth_max: - qubit_set = input_qubit_set - # While there are still qubits available - while not qubit_set.empty(): - # Compute the objective function - min_cost = compute_cost(scale, trial_layout.logic_to_phys, - gates, num_gates) - # Try to decrease objective function - cost_reduced = 0 - - # Loop over edges of coupling graph - need_copy = 1 - for idx in range(num_edges): - start_edge = edges[2*idx] - end_edge = edges[2*idx+1] - start_qubit = trial_layout.phys_to_logic[start_edge] - end_qubit = trial_layout.phys_to_logic[end_edge] - # Are the qubits available? - if qubit_set.count(start_qubit) and qubit_set.count(end_qubit): - # Try this edge to reduce the cost - if need_copy: - new_layout = trial_layout.copy() - need_copy = 0 - new_layout.swap(start_edge, end_edge) - # Compute the objective function - new_cost = compute_cost(scale, new_layout.logic_to_phys, - gates, num_gates) - # Record progress if we succeed - if new_cost < min_cost: - cost_reduced = True - min_cost = new_cost - optimal_layout = new_layout - optimal_start = start_edge - optimal_end = end_edge - optimal_start_qubit = start_qubit - optimal_end_qubit = end_qubit - need_copy = 1 - else: - new_layout.swap(start_edge, end_edge) - - # After going over all edges - # Were there any good swap choices? - if cost_reduced: - qubit_set.erase(optimal_start_qubit) - qubit_set.erase(optimal_end_qubit) - trial_layout = optimal_layout - opt_edges.add(optimal_start, optimal_end) - else: - break - - # We have either run out of swap pairs to try or - # failed to improve the cost. - - # Compute the coupling graph distance - dist = compute_cost(cdist, trial_layout.logic_to_phys, - gates, num_gates) - # If all gates can be applied now, we are finished. - # Otherwise we need to consider a deeper swap circuit - if dist == num_gates: - break - - # Increment the depth - depth_step += 1 - - # Either we have succeeded at some depth d < dmax or failed - dist = compute_cost(cdist, trial_layout.logic_to_phys, - gates, num_gates) - - return dist, opt_edges, trial_layout, depth_step diff --git a/qiskit/transpiler/passes/routing/cython/stochastic_swap/utils.pxd b/qiskit/transpiler/passes/routing/cython/stochastic_swap/utils.pxd deleted file mode 100644 index f41e16fcea63..000000000000 --- a/qiskit/transpiler/passes/routing/cython/stochastic_swap/utils.pxd +++ /dev/null @@ -1,43 +0,0 @@ -#!python -#cython: language_level = 3, cdivision = True, nonecheck = False -#distutils: language = c++ - -# This code is part of Qiskit. -# -# (C) Copyright IBM 2017, 2018. -# -# This code is licensed under the Apache License, Version 2.0. You may -# obtain a copy of this license in the LICENSE.txt file in the root directory -# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. -# -# Any modifications or derivative works of this code must retain this -# copyright notice, and modified files need to carry a notice indicating -# that they have been altered from the originals. - -from libcpp.vector cimport vector - -# Numeric layout -------------------------------------------------------------- -cdef class NLayout: - cdef: - unsigned int l2p_len - unsigned int p2l_len - unsigned int * logic_to_phys - unsigned int * phys_to_logic - - # Methods - cdef NLayout copy(self) - cdef void swap(self, unsigned int idx1, unsigned int idx2) - cpdef object to_layout(self, object dag) - - -cpdef NLayout nlayout_from_layout(object layout, - dict qubit_indices, - unsigned int logical_qubits, - unsigned int physical_qubits) - - -# Edge collection ------------------------------------------------------------- -cdef class EdgeCollection: - cdef vector[unsigned int] _edges - - cpdef void add(self, unsigned int edge_start, unsigned int edge_end) diff --git a/qiskit/transpiler/passes/routing/cython/stochastic_swap/utils.pyx b/qiskit/transpiler/passes/routing/cython/stochastic_swap/utils.pyx deleted file mode 100644 index f60574f10f1a..000000000000 --- a/qiskit/transpiler/passes/routing/cython/stochastic_swap/utils.pyx +++ /dev/null @@ -1,188 +0,0 @@ -#!python -#cython: language_level = 3 -#distutils: language = c++ - -# This code is part of Qiskit. -# -# (C) Copyright IBM 2017, 2018. -# -# This code is licensed under the Apache License, Version 2.0. You may -# obtain a copy of this license in the LICENSE.txt file in the root directory -# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. -# -# Any modifications or derivative works of this code must retain this -# copyright notice, and modified files need to carry a notice indicating -# that they have been altered from the originals. - -cimport cython -import numpy as np -from libc.stdlib cimport calloc, free -from libcpp.vector cimport vector - -from qiskit.transpiler.layout import Layout -from qiskit.circuit import Qubit - -cdef class EdgeCollection: - """ A simple contain that contains a C++ vector - representing edges in the coupling map that are - found to be optimal by the swap mapper. This allows - us to keep the vector alive. - """ - cpdef void add(self, unsigned int edge_start, unsigned int edge_end): - """ Add two edges, in order, to the collection. - - Args: - edge_start (int): The beginning edge. - edge_end (int): The end of the edge. - """ - self._edges.push_back(edge_start) - self._edges.push_back(edge_end) - - @property - def size(self): - """ The size of the edge collection. - Returns: - int: Size of the edge collection. - """ - return self._edges.size() - - @cython.boundscheck(False) - def edges(self): - """ Returns the vector of edges as a NumPy array. - Returns: - ndarray: Int array of edges. - """ - cdef size_t kk - out = np.zeros(self._edges.size(), dtype=np.uint32) - for kk in range(self._edges.size()): - out[kk] = self._edges[kk] - return out - - -cdef class NLayout: - """ A Numeric representation of a Qiskit Layout object. - Here all qubit layouts are stored as int arrays. - """ - def __cinit__(self, unsigned int num_logical, - unsigned int num_physical): - """ Init object. - Args: - num_logical (int): Number of logical qubits. - num_physical (int): Number of physical qubits. - """ - self.l2p_len = num_logical - self.p2l_len = num_physical - self.logic_to_phys = calloc(num_logical, - sizeof(unsigned int)) - self.phys_to_logic = calloc(num_physical, - sizeof(unsigned int)) - - def __dealloc__(self): - """ Clears the pointers when finished. - """ - if self.logic_to_phys is not NULL: - free(self.logic_to_phys) - self.logic_to_phys = NULL - if self.phys_to_logic is not NULL: - free(self.phys_to_logic) - self.phys_to_logic = NULL - - @property - def logic_to_phys(self): - """ The array mapping logical to physical qubits. - Returns: - ndarray: Int array of logical to physical mappings. - """ - cdef size_t kk - out = np.zeros(self.l2p_len, dtype=np.int32) - for kk in range(self.l2p_len): - out[kk] = self.logic_to_phys[kk] - return out - - @property - def phys_to_logic(self): - """ The array mapping physical to logical qubits. - Returns: - ndarray: Int array of physical to logical mappings. - """ - cdef size_t kk - out = np.zeros(self.p2l_len, dtype=np.int32) - for kk in range(self.p2l_len): - out[kk] = self.phys_to_logic[kk] - return out - - @cython.boundscheck(False) - cdef NLayout copy(self): - """ Returns a copy of the layout. - - Returns: - NLayout: A copy of the layout. - """ - cdef NLayout out = NLayout(self.l2p_len, self.p2l_len) - cdef size_t kk - for kk in range(self.l2p_len): - out.logic_to_phys[kk] = self.logic_to_phys[kk] - for kk in range(self.p2l_len): - out.phys_to_logic[kk] = self.phys_to_logic[kk] - return out - - @cython.boundscheck(False) - cdef void swap(self, unsigned int idx1, unsigned int idx2): - """ Swaps two indices in the Layout - - Args: - idx1 (int): Index 1. - idx2 (int): Index 2. - """ - cdef unsigned int temp1, temp2 - temp1 = self.phys_to_logic[idx1] - temp2 = self.phys_to_logic[idx2] - self.phys_to_logic[idx1] = temp2 - self.phys_to_logic[idx2] = temp1 - self.logic_to_phys[self.phys_to_logic[idx1]] = idx1 - self.logic_to_phys[self.phys_to_logic[idx2]] = idx2 - - @cython.boundscheck(False) - cpdef object to_layout(self, object qregs): - """ Converts numeric layout back to Qiskit Layout object. - - Args: - qregs (OrderedDict): An ordered dict of Qubit instances. - - Returns: - Layout: The corresponding Qiskit Layout object. - """ - out = Layout() - cdef unsigned int main_idx = 0 - cdef size_t idx - for qreg in qregs.values(): - for idx in range(qreg.size): - out[qreg[idx]] = self.logic_to_phys[main_idx] - main_idx += 1 - return out - - -cpdef NLayout nlayout_from_layout(object layout, - dict qubit_indices, - unsigned int logical_qubits, - unsigned int physical_qubits): - """ Converts Qiskit Layout object to numerical NLayout. - - Args: - layout (Layout): A Qiskit Layout instance. - qubit_indices (dict): Dict of Qubit instances to an integer index. - logical_qubits (int): Number of logical qubits. - physical_qubits (int): Number of physical qubits. - Returns: - NLayout: The corresponding numerical layout. - """ - - cdef NLayout out = NLayout(logical_qubits, physical_qubits) - cdef object key, val - cdef dict merged_dict = {**layout._p2v, **layout._v2p} - for key, val in merged_dict.items(): - if isinstance(key, Qubit): - out.logic_to_phys[qubit_indices[key]] = val - else: - out.phys_to_logic[key] = qubit_indices[val] - return out diff --git a/qiskit/transpiler/passes/routing/stochastic_swap.py b/qiskit/transpiler/passes/routing/stochastic_swap.py index b1ffe0cd6e72..f9c534ce1ceb 100644 --- a/qiskit/transpiler/passes/routing/stochastic_swap.py +++ b/qiskit/transpiler/passes/routing/stochastic_swap.py @@ -14,7 +14,6 @@ import logging from math import inf -from collections import OrderedDict import numpy as np from qiskit.circuit.quantumregister import QuantumRegister @@ -24,12 +23,7 @@ from qiskit.circuit.library.standard_gates import SwapGate from qiskit.transpiler.layout import Layout -# pylint: disable=no-name-in-module -from .cython.stochastic_swap.utils import nlayout_from_layout - -# pylint: disable=no-name-in-module -from .cython.stochastic_swap.swap_trial import swap_trial - +from qiskit._accelerate import stochastic_swap as stochastic_swap_rs logger = logging.getLogger(__name__) @@ -99,10 +93,7 @@ def run(self, dag): self._qubit_indices = {bit: idx for idx, bit in enumerate(dag.qubits)} self.qregs = dag.qregs - if self.seed is None: - self.seed = np.random.randint(0, np.iinfo(np.int32).max) - self.rng = np.random.default_rng(self.seed) - logger.debug("StochasticSwap default_rng seeded with seed=%s", self.seed) + logger.debug("StochasticSwap rng seeded with seed=%s", self.seed) self.coupling_map.compute_distance_matrix() new_dag = self._mapper(dag, self.coupling_map, trials=self.trials) return new_dag @@ -146,9 +137,7 @@ def _layer_permutation(self, layer_partition, layout, qubit_subset, coupling, tr logger.debug("layer_permutation: trials = %s", trials) # The input dag is on a flat canonical register - # TODO: cleanup the code that is general for multiple qregs below canonical_register = QuantumRegister(len(layout), "q") - qregs = OrderedDict({canonical_register.name: canonical_register}) gates = [] # list of lists of tuples [[(register, index), ...], ...] for gate_args in layer_partition: @@ -177,55 +166,37 @@ def _layer_permutation(self, layer_partition, layout, qubit_subset, coupling, tr best_layout = None # initialize best final layout cdist2 = coupling._dist_matrix**2 - # Scaling matrix - scale = np.zeros((num_qubits, num_qubits)) - int_qubit_subset = np.fromiter( (self._qubit_indices[bit] for bit in qubit_subset), - dtype=np.int32, + dtype=np.uint64, count=len(qubit_subset), ) int_gates = np.fromiter( (self._qubit_indices[bit] for gate in gates for bit in gate), - dtype=np.int32, + dtype=np.uint64, count=2 * len(gates), ) - int_layout = nlayout_from_layout(layout, self._qubit_indices, num_qubits, coupling.size()) + layout_mapping = {self._qubit_indices[k]: v for k, v in layout.get_virtual_bits().items()} + int_layout = stochastic_swap_rs.NLayout(layout_mapping, num_qubits, coupling.size()) trial_circuit = DAGCircuit() # SWAP circuit for slice of swaps in this trial trial_circuit.add_qubits(layout.get_virtual_bits()) - edges = np.asarray(coupling.get_edges(), dtype=np.int32).ravel() + edges = np.asarray(coupling.get_edges(), dtype=np.uint64).ravel() cdist = coupling._dist_matrix - for trial in range(trials): - logger.debug("layer_permutation: trial %s", trial) - # This is one Trial -------------------------------------- - dist, optim_edges, trial_layout, depth_step = swap_trial( - num_qubits, - int_layout, - int_qubit_subset, - int_gates, - cdist2, - cdist, - edges, - scale, - self.rng, - ) - - logger.debug("layer_permutation: final distance for this trial = %s", dist) - if dist == len(gates) and depth_step < best_depth: - logger.debug("layer_permutation: got circuit with improved depth %s", depth_step) - best_edges = optim_edges - best_layout = trial_layout - best_depth = min(best_depth, depth_step) - - # Break out of trial loop if we found a depth 1 circuit - # since we can't improve it further - if best_depth == 1: - break - + best_edges, best_layout, best_depth = stochastic_swap_rs.swap_trials( + trials, + num_qubits, + int_layout, + int_qubit_subset, + int_gates, + cdist, + cdist2, + edges, + seed=self.seed, + ) # If we have no best circuit for this layer, all of the # trials have failed if best_layout is None: @@ -233,7 +204,7 @@ def _layer_permutation(self, layer_partition, layout, qubit_subset, coupling, tr return False, None, None, None edges = best_edges.edges() - for idx in range(best_edges.size // 2): + for idx in range(len(edges) // 2): swap_src = self.trivial_layout._p2v[edges[2 * idx]] swap_tgt = self.trivial_layout._p2v[edges[2 * idx + 1]] trial_circuit.apply_operation_back(SwapGate(), [swap_src, swap_tgt], []) @@ -241,7 +212,9 @@ def _layer_permutation(self, layer_partition, layout, qubit_subset, coupling, tr # Otherwise, we return our result for this layer logger.debug("layer_permutation: success!") - best_lay = best_layout.to_layout(qregs) + layout_mapping = best_layout.layout_mapping() + + best_lay = Layout({best_circuit.qubits[k]: v for (k, v) in layout_mapping}) return True, best_circuit, best_depth, best_lay def _layer_update(self, dag, layer, best_layout, best_depth, best_circuit): diff --git a/releasenotes/notes/multithreaded-stochastic-swap-6c2f13d7bd566284.yaml b/releasenotes/notes/multithreaded-stochastic-swap-6c2f13d7bd566284.yaml new file mode 100644 index 000000000000..a2fd09dc7e82 --- /dev/null +++ b/releasenotes/notes/multithreaded-stochastic-swap-6c2f13d7bd566284.yaml @@ -0,0 +1,58 @@ +--- +features: + - | + The internals of the :class:`.StochasticSwap` algorithm have been reimplemented + to be multithreaded and are now written in the + `Rust `__ programming language instead of Cython. + This significantly increases the run time performance of the compiler pass + and by extension :func:`~.transpile` when run with ``optimization_level`` 0, + 1, and 2. By default the pass will use up to the number of logical CPUs on your + local system but you can control the number of threads used by the pass by setting + the ``RAYON_NUM_THREADS`` environment variable to an integer value. For example, + setting ``RAYON_NUM_THREADS=4`` will run the :class:`.StochasticSwap` with 4 + threads. + - | + A new environment variable ``QISKIT_FORCE_THREADS`` is available for users to + directly control whether potentially multithreaded portions of qiskit's code + will run in multiple threads. Currently this is only used by the + :class:`~.StochasticSwap` transpiler pass but it likely will be used other + parts of Qiskit in the future. When this env variable is set to ``TRUE`` any + multithreaded code in Qiskit Terra will always use multiple threads regardless + of any other runtime conditions that might have otherwise caused the function + to use a single threaded variant. For example, in :class:`~.StochasticSwap` if + the pass is being run as part of a :func:`~.transpile` call with > 1 circuit + that is being executed in parallel with ``multiprocessing`` via + :func:`~.parallel_map` the :class:`~.StochasticSwap` will not use multiple + threads to avoid potentially oversubscribing CPU resources. However, if you'd + like to use multiple threads in the pass along with multiple processes you + can set ``QISKIT_FORCE_THREADS=TRUE``. +upgrade: + - | + The :class:`.StochasticSwap` transpiler pass may return different results with + the same seed value set. This is due to the internal rewrite of the transpiler + pass to improve runtime performance. However, this means that if you ran + :func:`~.transpile` with ``optimization_level`` 0, 1 (the default), or 2 with a + value set for ``seed_transpiler`` you may get an output with different swap + mapping present after upgrading to Qiskit Terra 0.20.0. + - | + To build Qiskit Terra from source a `Rust `__ + compiler is now needed. This is due to the internal rewrite of the + :class:`.StochasticSwap` transpiler pass which greatly improves the runtime + performance of the transpiler. The rust compiler can easily be installed + using rustup, which can be found here: https://rustup.rs/ +issues: + - | + When running :func:`.parallel_map` (which is done internally by + performance sensitive functions such as :func:`.transpile` and + :func:`.assemble`) in a subprocess launched outside of + :func:`.parallel_map` it is possible that the parallel dispatch performed + inside :func:`.parallel_map` will hang and never return. + This is due to upstream issues in cpython (see: + https://bugs.python.org/issue40379 for more details) around the default + method to launch subprocesses on Linux and macOS (with Python 3.7). If you + encounter this you have two options you can either remove the nested + parallel processes as calling :func:`.parallel_map` from a main process + should work fine or you can manually call the cPython standard library + ``multiprocessing`` module to perform similar parallel dispatch from a + subprocess but use the ``"spawn"`` or ``"forkserver"`` launch methods to + avoid the potential to have things get stuck and never return. diff --git a/requirements-dev.txt b/requirements-dev.txt index 7d0d367d5938..a83d45b91736 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,3 +1,4 @@ +setuptools-rust coverage>=4.4.0 hypothesis>=4.24.3 ipython<7.22.0 diff --git a/setup.py b/setup.py index 5bf3a0ebade4..b40376fbc29f 100755 --- a/setup.py +++ b/setup.py @@ -16,6 +16,7 @@ import re import sys from setuptools import setup, find_packages, Extension +from setuptools_rust import Binding, RustExtension try: from Cython.Build import cythonize @@ -25,17 +26,12 @@ subprocess.call([sys.executable, "-m", "pip", "install", "Cython>=0.27.1"]) from Cython.Build import cythonize + with open("requirements.txt") as f: REQUIREMENTS = f.read().splitlines() # Add Cython extensions here CYTHON_EXTS = { - "qiskit/transpiler/passes/routing/cython/stochastic_swap/utils": ( - "qiskit.transpiler.passes.routing.cython.stochastic_swap.utils" - ), - "qiskit/transpiler/passes/routing/cython/stochastic_swap/swap_trial": ( - "qiskit.transpiler.passes.routing.cython.stochastic_swap.swap_trial" - ), "qiskit/quantum_info/states/cython/exp_value": "qiskit.quantum_info.states.cython.exp_value", } @@ -139,6 +135,7 @@ "Source Code": "https://github.com/Qiskit/qiskit-terra", }, ext_modules=cythonize(EXT_MODULES), + rust_extensions=[RustExtension("qiskit._accelerate", "Cargo.toml", binding=Binding.PyO3)], zip_safe=False, entry_points={ "qiskit.unitary_synthesis": [ diff --git a/src/edge_collections.rs b/src/edge_collections.rs new file mode 100644 index 000000000000..103d0db5d4cf --- /dev/null +++ b/src/edge_collections.rs @@ -0,0 +1,67 @@ +// This code is part of Qiskit. +// +// (C) Copyright IBM 2022 +// +// This code is licensed under the Apache License, Version 2.0. You may +// obtain a copy of this license in the LICENSE.txt file in the root directory +// of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +// +// Any modifications or derivative works of this code must retain this +// copyright notice, and modified files need to carry a notice indicating +// that they have been altered from the originals. + +use numpy::IntoPyArray; +use pyo3::prelude::*; +use pyo3::Python; + +/// A simple container that contains a vector representing edges in the +/// coupling map that are found to be optimal by the swap mapper. +#[pyclass(module = "qiskit._accelerate.stochastic_swap")] +#[pyo3(text_signature = "(/)")] +#[derive(Clone, Debug)] +pub struct EdgeCollection { + pub edges: Vec, +} + +impl Default for EdgeCollection { + fn default() -> Self { + Self::new() + } +} + +#[pymethods] +impl EdgeCollection { + #[new] + pub fn new() -> Self { + EdgeCollection { edges: Vec::new() } + } + + /// Add two edges, in order, to the collection. + /// + /// Args: + /// edge_start (int): The beginning edge. + /// edge_end (int): The end of the edge. + #[pyo3(text_signature = "(self, edge_start, edge_end, /)")] + pub fn add(&mut self, edge_start: usize, edge_end: usize) { + self.edges.push(edge_start); + self.edges.push(edge_end); + } + + /// Return the numpy array of edges + /// + /// The out array is the flattened edge list from the coupling graph. + /// For example, if the edge list were ``[(0, 1), (1, 2), (2, 3)]`` the + /// output array here would be ``[0, 1, 1, 2, 2, 3]``. + #[pyo3(text_signature = "(self, /)")] + pub fn edges(&self, py: Python) -> PyObject { + self.edges.clone().into_pyarray(py).into() + } + + fn __getstate__(&self) -> Vec { + self.edges.clone() + } + + fn __setstate__(&mut self, state: Vec) { + self.edges = state + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 000000000000..ff13bea71783 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,27 @@ +// This code is part of Qiskit. +// +// (C) Copyright IBM 2022 +// +// This code is licensed under the Apache License, Version 2.0. You may +// obtain a copy of this license in the LICENSE.txt file in the root directory +// of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +// +// Any modifications or derivative works of this code must retain this +// copyright notice, and modified files need to carry a notice indicating +// that they have been altered from the originals. + +use pyo3::prelude::*; +use pyo3::wrap_pymodule; +use pyo3::Python; + +mod edge_collections; +mod nlayout; +mod stochastic_swap; + +use crate::stochastic_swap::PyInit_stochastic_swap; + +#[pymodule] +fn _accelerate(_py: Python<'_>, m: &PyModule) -> PyResult<()> { + m.add_wrapped(wrap_pymodule!(stochastic_swap))?; + Ok(()) +} diff --git a/src/nlayout.rs b/src/nlayout.rs new file mode 100644 index 000000000000..53675d07f521 --- /dev/null +++ b/src/nlayout.rs @@ -0,0 +1,89 @@ +// This code is part of Qiskit. +// +// (C) Copyright IBM 2022 +// +// This code is licensed under the Apache License, Version 2.0. You may +// obtain a copy of this license in the LICENSE.txt file in the root directory +// of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +// +// Any modifications or derivative works of this code must retain this +// copyright notice, and modified files need to carry a notice indicating +// that they have been altered from the originals. + +use pyo3::prelude::*; + +use hashbrown::HashMap; + +/// An unsigned integer Vector based layout class +/// +/// This class tracks the layout (or mapping between virtual qubits in the the +/// circuit and physical qubits on the physical device) efficiently +/// +/// Args: +/// qubit_indices (dict): A dictionary mapping the virtual qubit index in the circuit to the +/// physical qubit index on the coupling graph. +/// logical_qubits (int): The number of logical qubits in the layout +/// physical_qubits (int): The number of physical qubits in the layout +#[pyclass(module = "qiskit._accelerate.stochastic_swap")] +#[pyo3(text_signature = "(qubit_indices, logical_qubits, physical_qubits, /)")] +#[derive(Clone, Debug)] +pub struct NLayout { + pub logic_to_phys: Vec, + pub phys_to_logic: Vec, +} + +impl NLayout { + pub fn swap(&mut self, idx1: usize, idx2: usize) { + self.phys_to_logic.swap(idx1, idx2); + self.logic_to_phys[self.phys_to_logic[idx1]] = idx1; + self.logic_to_phys[self.phys_to_logic[idx2]] = idx2; + } +} + +#[pymethods] +impl NLayout { + #[new] + fn new( + qubit_indices: HashMap, + logical_qubits: usize, + physical_qubits: usize, + ) -> Self { + let mut res = NLayout { + logic_to_phys: vec![std::usize::MAX; logical_qubits], + phys_to_logic: vec![std::usize::MAX; physical_qubits], + }; + for (key, value) in qubit_indices { + res.logic_to_phys[key] = value; + res.phys_to_logic[value] = key; + } + res + } + + fn __getstate__(&self) -> [Vec; 2] { + [self.logic_to_phys.clone(), self.phys_to_logic.clone()] + } + + fn __setstate__(&mut self, state: [Vec; 2]) { + self.logic_to_phys = state[0].clone(); + self.phys_to_logic = state[1].clone(); + } + + /// Return the layout mapping + /// + /// .. note:: + /// + /// this copies the data from Rust to Python and has linear + /// overhead based on the number of qubits. + /// + /// Returns: + /// list: A list of 2 element lists in the form: + /// ``[[logical_qubit, physical_qubit], ...]``. Where the logical qubit + /// is the index in the qubit index in the circuit. + /// + #[pyo3(text_signature = "(self, /)")] + fn layout_mapping(&self) -> Vec<[usize; 2]> { + (0..self.logic_to_phys.len()) + .map(|i| [i, self.logic_to_phys[i]]) + .collect() + } +} diff --git a/src/stochastic_swap.rs b/src/stochastic_swap.rs new file mode 100644 index 000000000000..033335024831 --- /dev/null +++ b/src/stochastic_swap.rs @@ -0,0 +1,352 @@ +// This code is part of Qiskit. +// +// (C) Copyright IBM 2022 +// +// This code is licensed under the Apache License, Version 2.0. You may +// obtain a copy of this license in the LICENSE.txt file in the root directory +// of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +// +// Any modifications or derivative works of this code must retain this +// copyright notice, and modified files need to carry a notice indicating +// that they have been altered from the originals. + +// Needed to pass shared state between functions +// closures don't work because of recurssion +#![allow(clippy::too_many_arguments)] +#![allow(clippy::type_complexity)] + +use std::env; +use std::sync::RwLock; + +use hashbrown::HashSet; + +use ndarray::prelude::*; +use numpy::{PyReadonlyArray1, PyReadonlyArray2}; +use rayon::prelude::*; + +use pyo3::prelude::*; +use pyo3::wrap_pyfunction; +use pyo3::Python; + +use rand::prelude::*; +use rand_distr::{Distribution, Normal}; +use rand_pcg::Pcg64Mcg; + +use crate::edge_collections::EdgeCollection; +use crate::nlayout::NLayout; + +#[inline] +fn compute_cost( + dist: &ArrayView2, + layout: &NLayout, + gates: &[usize], + num_gates: usize, +) -> f64 { + (0..num_gates) + .map(|kk| { + let ii = layout.logic_to_phys[gates[2 * kk]]; + let jj = layout.logic_to_phys[gates[2 * kk + 1]]; + dist[[ii, jj]] + }) + .sum() +} + +/// Computes the symmetric random scaling (perturbation) matrix, +/// and places the values in the 'scale' array. +/// +/// Args: +/// scale (ndarray): An array of doubles where the values are to be stored. +/// cdist2 (ndarray): Array representing the coupling map distance squared. +/// rand (double *): Array of rands of length num_qubits*(num_qubits+1)//2. +/// num_qubits (int): Number of physical qubits. +#[inline] +fn compute_random_scaling( + scale: &mut Array2, + cdist2: &ArrayView2, + rand: &[f64], + num_qubits: usize, +) { + let mut idx: usize = 0; + for ii in 0..num_qubits { + for jj in 0..ii { + scale[[ii, jj]] = rand[idx] * cdist2[[ii, jj]]; + scale[[jj, ii]] = scale[[ii, jj]]; + idx += 1 + } + } +} + +fn swap_trial( + num_qubits: usize, + int_layout: &NLayout, + int_qubit_subset: &[usize], + gates: &[usize], + cdist: ArrayView2, + cdist2: ArrayView2, + edges: &[usize], + seed: u64, + trial_num: u64, + locked_best_possible: Option<&RwLock<&mut Option<(u64, f64, EdgeCollection, NLayout)>>>, +) -> Option<(f64, EdgeCollection, NLayout, usize)> { + if let Some(locked_best_possible) = locked_best_possible { + // Return fast if a depth == 1 solution was already found in another parallel + // trial. However for deterministic results in cases of multiple depth == 1 + // solutions still search for a solution if this trial number is less than + // the found solution (this mirrors the previous behavior of a serial loop). + let best_possible = locked_best_possible.read().unwrap(); + if best_possible.is_some() && best_possible.as_ref().unwrap().0 < trial_num { + return None; + } + } + let mut opt_edges = EdgeCollection::new(); + let mut trial_layout = int_layout.clone(); + let mut optimal_layout = int_layout.clone(); + + let num_gates: usize = gates.len() / 2; + let num_edges: usize = edges.len() / 2; + + let mut cost_reduced; + let mut depth_step: usize = 1; + let depth_max: usize = 2 * num_qubits + 1; + let mut min_cost: f64; + let mut new_cost: f64; + let mut dist: f64; + + let mut optimal_start: usize = std::usize::MAX; + let mut optimal_end: usize = std::usize::MAX; + let mut optimal_start_qubit = std::usize::MAX; + let mut optimal_end_qubit = std::usize::MAX; + + let mut scale = Array2::zeros((num_qubits, num_qubits)); + + let distribution = Normal::new(1.0, 1.0 / num_qubits as f64).unwrap(); + let mut rng: Pcg64Mcg = Pcg64Mcg::seed_from_u64(seed); + let rand_arr: Vec = distribution + .sample_iter(&mut rng) + .take(num_qubits * (num_qubits + 1) / 2) + .collect(); + + compute_random_scaling(&mut scale, &cdist2, &rand_arr, num_qubits); + + let input_qubit_set: HashSet = int_qubit_subset.iter().copied().collect(); + + while depth_step < depth_max { + let mut qubit_set = input_qubit_set.clone(); + while !qubit_set.is_empty() { + min_cost = compute_cost(&scale.view(), &trial_layout, gates, num_gates); + // Try to decrease the objective function + cost_reduced = false; + for idx in 0..num_edges { + let start_edge = edges[2 * idx]; + let end_edge = edges[2 * idx + 1]; + let start_qubit = trial_layout.phys_to_logic[start_edge]; + let end_qubit = trial_layout.phys_to_logic[end_edge]; + if qubit_set.contains(&start_qubit) && qubit_set.contains(&end_qubit) { + // Try this edge to reduce cost + trial_layout.swap(start_edge, end_edge); + // compute objective function + new_cost = compute_cost(&scale.view(), &trial_layout, gates, num_gates); + // record progress if we succeed + if new_cost < min_cost { + cost_reduced = true; + min_cost = new_cost; + optimal_layout = trial_layout.clone(); + optimal_start = start_edge; + optimal_end = end_edge; + optimal_start_qubit = start_qubit; + optimal_end_qubit = end_qubit; + } + trial_layout.swap(start_edge, end_edge); + } + } + // After going over all edges + // Were there any good swap choices? + if cost_reduced { + qubit_set.remove(&optimal_start_qubit); + qubit_set.remove(&optimal_end_qubit); + trial_layout = optimal_layout.clone(); + opt_edges.add(optimal_start, optimal_end); + } else { + break; + } + } + // We have either run out of swap pairs to try or failed to improve + // the cost + + // Compute the coupling graph distance + dist = compute_cost(&cdist, &trial_layout, gates, num_gates); + // If all gates can be applied now we're finished. + // Otherwise we need to consider a deeper swap circuit + if dist as usize == num_gates { + break; + } + // increment the depth + depth_step += 1; + } + // Either we have succeeded at some depth d < d_max or failed + dist = compute_cost(&cdist, &trial_layout, gates, num_gates); + if let Some(locked_best_possible) = locked_best_possible { + if dist as usize == num_gates && depth_step == 1 { + let mut best_possible = locked_best_possible.write().unwrap(); + // In the case an ideal solution has already been found to preserve + // behavior consistent with the single threaded predecessor to this function + // we defer to the earlier trial + if best_possible.is_none() || best_possible.as_ref().unwrap().0 > trial_num { + **best_possible = Some((trial_num, dist, opt_edges, trial_layout)); + } + return None; + } + } + Some((dist, opt_edges, trial_layout, depth_step)) +} + +/// Run the random trials as part of the layer permutation used internally for +/// the stochastic swap algorithm. +/// +/// This function is multithreaded and will spawn a thread pool as part of its +/// execution. By default the number of threads will be equal to the number of +/// CPUs. You can tune the number of threads with the RAYON_NUM_THREADS +/// environment variable. For example, setting RAYON_NUM_THREADS=4 would limit +/// the thread pool to 4 threads. +/// +/// Args: +/// num_trials (int): The number of random trials to attempt +/// num_qubits (int): The number of qubits +/// int_layout (NLayout): The initial layout for the layer. The layout is a mapping +/// of virtual qubits to physical qubits in the coupling graph +/// int_qubit_subset (ndarray): A 1D array of qubit indices for the set of qubits in the +/// coupling map that we've chosen to map into. +/// int_gates (ndarray): A 1D array of qubit pairs that each 2 qubit gate operates on. +/// The pairs are flattened on the array so that each pair in the list of 2q gates +/// are adjacent in the array. For example, if the 2q interaction list was +/// ``[(0, 1), (2, 1), (3, 2)]``, the input here would be ``[0, 1, 2, 1, 3, 2]``. +/// cdist (ndarray): The distance matrix for the coupling graph of the target +/// backend +/// cdist2 (ndarray): The distance matrix squared for the coupling graph of the +/// target backend +/// edges (ndarray): A flattened 1d array of the edge list of the coupling graph. +/// The pairs are flattened on the array so that each node pair in the edge are +/// adjacent in the array. For example, if the edge list were ``[(0, 1), (1, 2), (2, 3)]`` +/// the input array here would be ``[0, 1, 1, 2, 2, 3]``. +/// seed (int): An optional seed for the rng used to generate the random perturbation +/// matrix used in each trial +/// Returns: +/// tuple: If a valid layout permutation is found a tuple of the form: +/// ``(edges, layout, depth)`` is returned. If a solution is not found the output +/// will be ``(None, None, max int)``. +#[pyfunction] +#[pyo3( + text_signature = "(num_trials, num_qubits, int_layout, int_qubit_subset, int_gates, cdist, cdist2, edges, /, seed=None)" +)] +pub fn swap_trials( + num_trials: u64, + num_qubits: usize, + int_layout: &NLayout, + int_qubit_subset: PyReadonlyArray1, + int_gates: PyReadonlyArray1, + cdist: PyReadonlyArray2, + cdist2: PyReadonlyArray2, + edges: PyReadonlyArray1, + seed: Option, +) -> PyResult<(Option, Option, usize)> { + let int_qubit_subset_arr = int_qubit_subset.as_slice()?; + let int_gates_arr = int_gates.as_slice()?; + let cdist_arr = cdist.as_array(); + let cdist2_arr = cdist2.as_array(); + let edges_arr = edges.as_slice()?; + let num_gates: usize = int_gates.len() / 2; + let mut best_possible: Option<(u64, f64, EdgeCollection, NLayout)> = None; + let locked_best_possible: RwLock<&mut Option<(u64, f64, EdgeCollection, NLayout)>> = + RwLock::new(&mut best_possible); + let outer_rng: Pcg64Mcg = match seed { + Some(seed) => Pcg64Mcg::seed_from_u64(seed), + None => Pcg64Mcg::from_entropy(), + }; + let seed_vec: Vec = outer_rng + .sample_iter(&rand::distributions::Standard) + .take(num_trials as usize) + .collect(); + // Run in parallel only if we're not already in a multiprocessing context + // unless force threads is set. + let parallel_context = env::var("QISKIT_IN_PARALLEL") + .unwrap_or_else(|_| "FALSE".to_string()) + .to_uppercase() + == "TRUE"; + let force_threads = env::var("QISKIT_FORCE_THREADS") + .unwrap_or_else(|_| "FALSE".to_string()) + .to_uppercase() + == "TRUE"; + let run_in_parallel = !parallel_context || force_threads; + + let mut best_depth = std::usize::MAX; + let mut best_edges: Option = None; + let mut best_layout: Option = None; + if run_in_parallel { + let result: Vec> = (0..num_trials) + .into_par_iter() + .map(|trial_num| { + swap_trial( + num_qubits, + int_layout, + int_qubit_subset_arr, + int_gates_arr, + cdist_arr, + cdist2_arr, + edges_arr, + seed_vec[trial_num as usize], + trial_num, + Some(&locked_best_possible), + ) + }) + .collect(); + match best_possible { + Some((_trial_num, _dist, edges, layout)) => { + best_edges = Some(edges); + best_layout = Some(layout); + best_depth = 1; + } + None => { + for (dist, edges, layout, depth) in result.into_iter().flatten() { + if dist as usize == num_gates && depth < best_depth { + best_edges = Some(edges); + best_layout = Some(layout); + best_depth = depth; + } + } + } + }; + } else { + for trial_num in 0..num_trials { + let (dist, edges, layout, depth) = swap_trial( + num_qubits, + int_layout, + int_qubit_subset_arr, + int_gates_arr, + cdist_arr, + cdist2_arr, + edges_arr, + seed_vec[trial_num as usize], + trial_num, + None, + ) + .unwrap(); + if dist as usize == num_gates && depth < best_depth { + best_edges = Some(edges); + best_layout = Some(layout); + best_depth = depth; + if depth == 1 { + return Ok((best_edges, best_layout, best_depth)); + } + } + } + } + Ok((best_edges, best_layout, best_depth)) +} + +#[pymodule] +pub fn stochastic_swap(_py: Python, m: &PyModule) -> PyResult<()> { + m.add_wrapped(wrap_pyfunction!(swap_trials))?; + m.add_class::()?; + m.add_class::()?; + Ok(()) +} diff --git a/test/python/qasm/TestsStochasticSwap_handle_measurement.qasm b/test/python/qasm/TestsStochasticSwap_handle_measurement.qasm index bc161f609758..a14feda051e2 100644 --- a/test/python/qasm/TestsStochasticSwap_handle_measurement.qasm +++ b/test/python/qasm/TestsStochasticSwap_handle_measurement.qasm @@ -5,11 +5,10 @@ creg c[4]; cx q[0],q[1]; h q[3]; measure q[2] -> c[2]; -swap q[1],q[2]; -swap q[0],q[1]; -cx q[3],q[2]; swap q[2],q[3]; cx q[2],q[1]; +swap q[0],q[1]; +measure q[0] -> c[1]; +cx q[2],q[1]; measure q[1] -> c[0]; -measure q[3] -> c[1]; measure q[2] -> c[3]; diff --git a/test/python/transpiler/test_stochastic_swap.py b/test/python/transpiler/test_stochastic_swap.py index d1e2a233bde9..8aed99218c6f 100644 --- a/test/python/transpiler/test_stochastic_swap.py +++ b/test/python/transpiler/test_stochastic_swap.py @@ -318,36 +318,30 @@ def test_overoptimization_case(self): expected.z(qr[2]) expected.y(qr[1]) expected.x(qr[0]) - expected.swap(qr[1], qr[2]) - expected.cx(qr[0], qr[2]) - expected.swap(qr[2], qr[3]) - expected.cx(qr[1], qr[2]) - expected.s(qr[3]) - expected.t(qr[1]) - expected.h(qr[2]) + expected.swap(qr[0], qr[2]) + expected.cx(qr[2], qr[1]) + expected.swap(qr[0], qr[2]) + expected.cx(qr[2], qr[3]) + expected.s(qr[1]) + expected.t(qr[2]) + expected.h(qr[3]) expected.measure(qr[0], cr[0]) - expected.swap(qr[1], qr[2]) - expected.cx(qr[3], qr[2]) - expected.measure(qr[1], cr[3]) - expected.measure(qr[3], cr[1]) + expected.cx(qr[1], qr[2]) + expected.measure(qr[3], cr[3]) + expected.measure(qr[1], cr[1]) expected.measure(qr[2], cr[2]) expected_dag = circuit_to_dag(expected) - # ┌───┐ ┌─┐ - # q_0: |0>─────────────┤ X ├──■──┤M├──────────────────────────────────────── - # ┌───┐ └───┘ │ └╥┘ ┌───┐ ┌───┐┌─┐ - # q_1: |0>─────┤ Y ├─X────────┼───╫───────────■──┤ T ├────────┤ X ├┤M├────── - # ┌───┐└───┘ │ ┌─┴─┐ ║ ┌─┴─┐└───┘┌───┐ └─┬─┘└╥┘┌─┐ - # q_2: |0>┤ Z ├──────X──────┤ X ├─╫──X──────┤ X ├─────┤ H ├─X───■───╫─┤M├─── - # └───┘ └───┘ ║ │ ┌───┐└───┘ └───┘ │ ║ └╥┘┌─┐ - # q_3: |0>────────────────────────╫──X─┤ S ├────────────────X───────╫──╫─┤M├ - # ║ └───┘ ║ ║ └╥┘ - # c_0: 0 ════════════════════════╩═════════════════════════════════╬══╬══╬═ - # ║ ║ ║ - # c_1: 0 ══════════════════════════════════════════════════════════╬══╩══╬═ - # ║ ║ - # c_2: 0 ══════════════════════════════════════════════════════════╩═════╬═ - # ║ - # c_3: 0 ════════════════════════════════════════════════════════════════╩═ + # ┌───┐ ┌─┐ + # q_0: ┤ X ├─X───────X──────┤M├──────────────── + # ├───┤ │ ┌───┐ │ ┌───┐└╥┘ ┌─┐ + # q_1: ┤ Y ├─┼─┤ X ├─┼─┤ S ├─╫────────■──┤M├─── + # ├───┤ │ └─┬─┘ │ └───┘ ║ ┌───┐┌─┴─┐└╥┘┌─┐ + # q_2: ┤ Z ├─X───■───X───■───╫─┤ T ├┤ X ├─╫─┤M├ + # └───┘ ┌─┴─┐ ║ ├───┤└┬─┬┘ ║ └╥┘ + # q_3: ────────────────┤ X ├─╫─┤ H ├─┤M├──╫──╫─ + # └───┘ ║ └───┘ └╥┘ ║ ║ + # c: 4/══════════════════════╩════════╩═══╩══╩═ + # 0 3 1 2 # # Layout -- @@ -428,6 +422,7 @@ def test_congestion(self): circ.measure(qr[2], cr[2]) circ.measure(qr[3], cr[3]) dag = circuit_to_dag(circ) + # Input: # ┌─┐┌───┐ ┌─┐ # q_0: |0>─────────────────■──────────────────┤M├┤ H ├──■─────┤M├ # ┌───┐ │ └╥┘└───┘┌─┴─┐┌─┐└╥┘ @@ -445,23 +440,20 @@ def test_congestion(self): # ║ # c_3: 0 ═══════════════════════════════╩═══════════════════════ # - # ┌───┐ ┌───┐ ┌─┐ - # q_0: |0>───────X──┤ H ├──────────────────────┤ X ├───┤M├ - # │ └───┘┌─┐ ┌───┐ └─┬─┘┌─┐└╥┘ - # q_1: |0>──■────X────■──┤M├──────X─┤ X ├─X──────■──┤M├─╫─ - # ┌─┴─┐┌───┐ │ └╥┘ │ └─┬─┘ │ ┌─┐ └╥┘ ║ - # q_2: |0>┤ X ├┤ H ├──┼───╫───────┼───■───┼─┤M├──────╫──╫─ - # └───┘└───┘┌─┴─┐ ║ ┌───┐ │ ┌───┐ │ └╥┘ ┌─┐ ║ ║ - # q_3: |0>──────────┤ X ├─╫─┤ H ├─X─┤ H ├─X──╫──┤M├──╫──╫─ - # └───┘ ║ └───┘ └───┘ ║ └╥┘ ║ ║ - # c_0: 0 ════════════════╩══════════════════╬═══╬═══╩══╬═ - # ║ ║ ║ - # c_1: 0 ═══════════════════════════════════╬═══╬══════╩═ - # ║ ║ - # c_2: 0 ═══════════════════════════════════╩═══╬════════ - # ║ - # c_3: 0 ═══════════════════════════════════════╩════════ + # Expected output (with seed 999): + # ┌───┐ ┌─┐ + # q_0: ───────X──┤ H ├─────────────────X──────┤M├────── + # │ └───┘ ┌─┐ ┌───┐ │ ┌───┐└╥┘ ┌─┐ + # q_1: ──■────X────■───────┤M├─X─┤ X ├─X─┤ X ├─╫────┤M├ + # ┌─┴─┐┌───┐ │ └╥┘ │ └─┬─┘┌─┐└─┬─┘ ║ └╥┘ + # q_2: ┤ X ├┤ H ├──┼────────╫──┼───■──┤M├──┼───╫─────╫─ + # └───┘└───┘┌─┴─┐┌───┐ ║ │ ┌───┐└╥┘ │ ║ ┌─┐ ║ + # q_3: ──────────┤ X ├┤ H ├─╫──X─┤ H ├─╫───■───╫─┤M├─╫─ + # └───┘└───┘ ║ └───┘ ║ ║ └╥┘ ║ + # c: 4/═════════════════════╩══════════╩═══════╩══╩══╩═ + # 0 2 3 0 1 # + # Target coupling graph: # 2 # | # 0 - 1 - 3 @@ -472,22 +464,21 @@ def test_congestion(self): expected.swap(qr[0], qr[1]) expected.h(qr[0]) expected.cx(qr[1], qr[3]) - expected.measure(qr[1], cr[0]) expected.h(qr[3]) + expected.measure(qr[1], cr[0]) expected.swap(qr[1], qr[3]) expected.cx(qr[2], qr[1]) expected.h(qr[3]) - expected.swap(qr[1], qr[3]) + expected.swap(qr[0], qr[1]) expected.measure(qr[2], cr[2]) - expected.measure(qr[3], cr[3]) - expected.cx(qr[1], qr[0]) - expected.measure(qr[1], cr[0]) - expected.measure(qr[0], cr[1]) + expected.cx(qr[3], qr[1]) + expected.measure(qr[0], cr[3]) + expected.measure(qr[3], cr[0]) + expected.measure(qr[1], cr[1]) expected_dag = circuit_to_dag(expected) pass_ = StochasticSwap(coupling, 20, 999) after = pass_.run(dag) - self.assertEqual(expected_dag, after) def test_only_output_cx_and_swaps_in_coupling_map(self): diff --git a/tools/install_rust.sh b/tools/install_rust.sh new file mode 100755 index 000000000000..d86416207240 --- /dev/null +++ b/tools/install_rust.sh @@ -0,0 +1,6 @@ +#!/bin/sh +if [ ! -d rust-installer ]; then + mkdir rust-installer + wget https://sh.rustup.rs -O rust-installer/rustup.sh + sh rust-installer/rustup.sh -y +fi diff --git a/tools/verify_parallel_map.py b/tools/verify_parallel_map.py new file mode 100755 index 000000000000..87fc8830abf7 --- /dev/null +++ b/tools/verify_parallel_map.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python +# This code is part of Qiskit. +# +# (C) Copyright IBM 2022. +# +# This code is licensed under the Apache License, Version 2.0. You may +# obtain a copy of this license in the LICENSE.txt file in the root directory +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +# +# Any modifications or derivative works of this code must retain this +# copyright notice, and modified files need to carry a notice indicating +# that they have been altered from the originals. + +# pylint: disable=wrong-import-position + +"""Test script to verify parallel dispatch via parallel_map() works as expected.""" + + +import math +import os + + +ORIG_ENV_VAR = os.getenv("QISKIT_PARALLEL", None) +if ORIG_ENV_VAR is not None: + print("Removing QISKIT_PARALLEL env var to verify defaults") + del os.environ["QISKIT_PARALLEL"] + + +from qiskit.compiler import transpile +from qiskit.circuit import QuantumCircuit, QuantumRegister, ClassicalRegister +from qiskit.test.mock import FakeRueschlikon + + +def run_test(): + """Run tests.""" + backend = FakeRueschlikon() + qr = QuantumRegister(16) + cr = ClassicalRegister(16) + qc = QuantumCircuit(qr, cr) + qc.h(qr[0]) + for k in range(1, 15): + qc.cx(qr[0], qr[k]) + qc.measure(qr, cr) + qlist = [qc for k in range(15)] + for opt_level in [0, 1, 2, 3]: + tqc = transpile( + qlist, backend=backend, optimization_level=opt_level, seed_transpiler=424242 + ) + result = backend.run(tqc, seed_simulator=4242424242, shots=1000).result() + counts = result.get_counts() + for count in counts: + assert math.isclose(count["0000000000000000"], 500, rel_tol=0.1) + assert math.isclose(count["0111111111111111"], 500, rel_tol=0.1) + + +if __name__ == "__main__": + run_test() + if ORIG_ENV_VAR is not None: + print(f"Restoring QISKIT_PARALLEL env var to {ORIG_ENV_VAR}") + os.environ["QISKIT_PARALLEL"] = ORIG_ENV_VAR diff --git a/tox.ini b/tox.ini index 0ca05f10807a..219bc954d5f0 100644 --- a/tox.ini +++ b/tox.ini @@ -1,7 +1,7 @@ [tox] -minversion = 2.1 +minversion = 3.3.0 envlist = py37, py38, py39, py310, lint-incr -skipsdist = True +isolated_build = true [testenv] usedevelop = True @@ -13,10 +13,12 @@ setenv = ARGS="-V" QISKIT_SUPRESS_PACKAGING_WARNINGS=Y QISKIT_TEST_CAPTURE_STREAMS=1 + QISKIT_PARALLEL=FALSE deps = -r{toxinidir}/requirements.txt -r{toxinidir}/requirements-dev.txt commands = stestr run {posargs} + {toxinidir}/tools/verify_parallel_map.py [testenv:lint] envdir = .tox/lint @@ -58,6 +60,7 @@ deps = -r{toxinidir}/requirements.txt qiskit-aer commands = stestr run {posargs} + coverage3 run --source qiskit --parallel-mode {toxinidir}/tools/verify_parallel_map.py coverage3 combine coverage3 report