Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add ability to do vector-vector loop #75

Merged
merged 48 commits into from
Sep 23, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
2fe40e0
feat: add ability to do vector-vector loop
steven-murray Dec 4, 2023
eab2dff
test: fix new cublas test
steven-murray Dec 4, 2023
0ae70a3
feat: completely overhauled algorithm that is easier to follow
steven-murray Dec 13, 2023
1dfb360
fix: profiling cli
steven-murray Dec 13, 2023
92ee8f7
fix: profiling cli
steven-murray Dec 13, 2023
3a18bca
cli: refactor get_label
steven-murray Dec 14, 2023
64ffc3e
perf: re-order feed/ax axes in interpolated beams
steven-murray Dec 14, 2023
6f9ffda
maint: conditional import of gpu subpackage
steven-murray Dec 15, 2023
ae7883e
maint: conditional import of gpu subpackage in cli
steven-murray Dec 15, 2023
68f6518
fix: cli label
steven-murray Dec 15, 2023
d06be60
fix: cli label
steven-murray Dec 15, 2023
9895473
cli: add option to change naz/nza for profiling
steven-murray Dec 15, 2023
3638c26
cli: add option to change naz/nza for profiling
steven-murray Dec 15, 2023
bdb5fe6
fix: summing for gpu with nchunks>1
steven-murray Dec 15, 2023
18cbf28
fix: summing for gpu with nchunks>1
steven-murray Dec 15, 2023
136feb0
fix: use more than 0.5 GB GPU memory
steven-murray Dec 15, 2023
f1a45e8
fix: memory estimate for ant-vis
steven-murray Dec 18, 2023
af7d3e2
feat: add --nchunks param to cli
steven-murray Dec 18, 2023
ecbeff8
perf: reduce memory usage in getz
steven-murray Dec 18, 2023
74de314
ux: extra debugging logging
steven-murray Dec 18, 2023
662fd73
perf: use initial memory cache
steven-murray Dec 18, 2023
317f205
ux: add source buffer to CLI
steven-murray Dec 18, 2023
c46b658
perf: don't require extra copy of beams for nbeams=1
steven-murray Dec 18, 2023
8aa3071
perf: use beam_idx=None in CLI
steven-murray Dec 18, 2023
c779593
fix: using beam_idx=None
steven-murray Dec 18, 2023
0d5d189
fix: properly slice the fluxes as well as crds
steven-murray Dec 21, 2023
a307e39
merge main
steven-murray Sep 4, 2024
f808a45
feat: add new ways to rotate coords
steven-murray Sep 5, 2024
3bdd874
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 5, 2024
918c9d6
upgrades to the cli profile
steven-murray Sep 5, 2024
7d0f884
Merge branch 'vector-vector' of github.com:hera-team/matvis into vect…
steven-murray Sep 5, 2024
5413164
use map_coordinates in pyuvdata
steven-murray Sep 5, 2024
65ce53e
fi: error in list comprehension
steven-murray Sep 5, 2024
6454b57
perf: add update_bcrs_every parameter
steven-murray Sep 6, 2024
35d8daa
feat: allow order>1 for gpu beam interp
steven-murray Sep 10, 2024
a2fc980
test: added and streamlined many tests
steven-murray Sep 11, 2024
cb9d891
ci: don't force gpu tests
steven-murray Sep 11, 2024
65ba661
ci: gpu tests env update
steven-murray Sep 11, 2024
2b28dcb
test: don't run gpu tests if it's not available
steven-murray Sep 11, 2024
a489c0e
test: don't run gpu tests if it's not available
steven-murray Sep 11, 2024
48e9802
ci: don't run plot tests
steven-murray Sep 11, 2024
59531a7
ci: try better self-hosted mamba env
steven-murray Sep 11, 2024
b53968e
ci: try better self-hosted mamba env
steven-murray Sep 11, 2024
182eaa6
ci: try better self-hosted mamba env
steven-murray Sep 11, 2024
92dd374
ci: try better self-hosted mamba env
steven-murray Sep 11, 2024
719b6d3
ci: use token for codecov
steven-murray Sep 12, 2024
f781630
ci: use token for codecov
steven-murray Sep 12, 2024
bef352a
docs: update notebook to use new coordinates
steven-murray Sep 23, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 97 additions & 0 deletions src/matvis/_cublas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
import numpy as np
import warnings

try:
with warnings.catch_warnings():
warnings.simplefilter("ignore")

from pycuda import autoinit, gpuarray
from skcuda.cublas import (
_libcublas,
cublasCgemm,
cublasCreate,
cublasDgemm,
cublasSgemm,
cublasZgemm,
)

HAVE_PYCUDA = True
except Exception:
HAVE_PYCUDA = False

if HAVE_PYCUDA:
_DEFAULT_CUBLAS_HANDLE = cublasCreate() # handle for managing cublas

DOT_MAP = {
np.dtype(np.complex64): _libcublas.cublasCdotc_v2,
np.dtype(np.complex128): _libcublas.cublasZdotc_v2,
np.dtype(np.float32): _libcublas.cublasSdot_v2,
np.dtype(np.float64): _libcublas.cublasDdot_v2,
}

def _cublas_dotc_inplace(handle, n, x, incx, y, incy, result):
"""In-place Zdotc that doesn't transfer data to the host."""
fnc = DOT_MAP[x.dtype.type]
fnc(handle, n, int(x), incx, int(y), incy, int(result))

def dotc(
a: gpuarray.GPUArray,
b: gpuarray.GPUArray,
out: gpuarray.GPUArray | None = None,
h=_DEFAULT_CUBLAS_HANDLE,
) -> gpuarray.GPUArray:
"""Dot-product of two vectors on the GPU.

Note that if the arrays are complex, the second one is conjugated.
"""
if out is None:
out = gpuarray.empty((1,), a.dtype)

return _cublas_dotc_inplace(h, a.size, a.gpudata, 1, b.gpudata, 1, out.gpudata)

GEMM_MAP = {
np.dtype(np.complex64): cublasCgemm,
np.dtype(np.complex128): cublasZgemm,
np.dtype(np.float32): cublasSgemm,
np.dtype(np.float64): cublasDgemm,
}

def gemm(
a: gpuarray.GPUArray,
b: gpuarray.GPUArray,
out: gpuarray.GPUArray | None = None,
h=_DEFAULT_CUBLAS_HANDLE,
trans_a="n",
trans_b="n",
) -> gpuarray.GPUArray:
"""Matrix-matrix multiplication on the GPU."""
fnc = GEMM_MAP[a.dtype.type]

if out is None:
out = gpuarray.empty((a.shape[0], b.shape[1]), a.dtype)

fnc( # compute crdtop = dot(eq2top,crd_eq)
h,
trans_a,
trans_b,
a.shape[0],
a.shape[1],
b.shape[1],
1.0,
a.gpudata, # crd_eq_gpu.gpudata,
a.shape[0],
b.gpudata, # eq2top_gpu.gpudata,
b.shape[0],
0.0,
out.gpudata,
a.shape[0],
)
return out

def zz(
z: gpuarray.GPUArray,
out: gpuarray.GPUArray | None = None,
h=_DEFAULT_CUBLAS_HANDLE,
) -> gpuarray.GPUArray:
"""Compute the matrix product of a matrix with itself (conjugated)."""
return gemm(z, z, out, trans_a="h", h=h)
59 changes: 47 additions & 12 deletions src/matvis/cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,11 +205,13 @@ def simulate(
crd_eq: np.ndarray,
I_sky: np.ndarray,
beam_list: Sequence[UVBeam | Callable] | None,
antpairs: np.ndarray | list[tuple[int, int]] | None = None,
precision: int = 1,
polarized: bool = False,
beam_idx: np.ndarray | None = None,
beam_spline_opts: dict | None = None,
max_progress_reports: int = 100,
use_loop_over_antpairs: bool = False,
):
"""
Calculate visibility from an input intensity map and beam model.
Expand Down Expand Up @@ -246,6 +248,10 @@ def simulate(
``beam_list`` should be provided.Note that if `polarized` is True,
these beams must be efield beams, and conversely if `polarized` is False they
must be power beams with a single polarization (either XX or YY).
antpairs : array_like, optional
Either a 2D array, shape ``(Npairs, 2)``, or list of 2-tuples of ints, with
the list of antenna-pairs to return as visibilities (all feed-pairs are always
calculated). If None, all feed-pairs are returned.
precision : int, optional
Which precision level to use for floats and complex numbers.
Allowed values:
Expand All @@ -264,13 +270,21 @@ def simulate(
max_progress_reports : int, optional
Maximum number of progress reports to print to the screen (if logging level
allows). Default is 100.
use_loop_over_antpairs : bool, optional
Whether to calculate visibilities for each antpair in antpairs as a vector
dot-product instead of using a full matrix-matrix multiplication for all
possible pairs. Default is False. Setting to True can be faster for large
arrays where `antpairs` is small (possibly from high redundancy). You should
run a performance test before using this.

Returns
-------
vis : array_like
Simulated visibilities. If `polarized = True`, the output will have
shape (NTIMES, NFEED, NFEED, NANTS, NANTS), otherwise it will have
shape (NTIMES, NANTS, NANTS).
shape (NTIMES, NANTS, NANTS). If ``antpairs`` is provided, the shape will have
``(NANTS, NANTS)`` replaced by ``(NPAIRS,)``.

"""
if not tm.is_tracing() and logger.isEnabledFor(logging.INFO):
tm.start()
Expand All @@ -296,12 +310,17 @@ def simulate(
# negative sky. Factor of 0.5 accounts for splitting Stokes I between
# polarization channels
Isqrt = np.sqrt(0.5 * I_sky).astype(real_dtype)
antpos = antpos.astype(real_dtype)

ang_freq = real_dtype(2.0 * np.pi * freq)

antpos_u = antpos.astype(real_dtype) * ang_freq / c.value

# Zero arrays: beam pattern, visibilities, delays, complex voltages
vis = np.full((ntimes, nfeed * nant, nfeed * nant), 0.0, dtype=complex_dtype)
if antpairs is None:
vis = np.full((ntimes, nfeed * nant, nfeed * nant), 0.0, dtype=complex_dtype)
else:
vis = np.full((ntimes, nfeed, nfeed, len(antpairs)), 0.0, dtype=complex_dtype)

logger.info(f"Visibility Array takes {vis.nbytes/1024**2:.1f} MB")

crd_eq = crd_eq.astype(real_dtype)
Expand Down Expand Up @@ -343,8 +362,8 @@ def simulate(

_log_array("beam", A_s)

# Calculate delays, where tau = (b * s) / c
tau = np.dot(antpos / c.value, crd_top[:, above_horizon])
# Calculate delays, where tau = 2pi*nu*(b * s) / c
tau = np.dot(antpos_u, crd_top[:, above_horizon])
_log_array("tau", tau)

v = _get_antenna_vis(
Expand All @@ -353,32 +372,48 @@ def simulate(
_log_array("vant", v)

# Compute visibilities using product of complex voltages (upper triangle).
vis[t] = v.conj().dot(v.T)
if antpairs is None:
vis[t] = v.conj().dot(v.T)
else:
if use_loop_over_antpairs:
v = v.reshape((nfeed, nant, -1))

for i, (ai, aj) in enumerate(antpairs):
vis[t, :, :, i] = v[:, ai].conj().dot(v[:, aj].T)

else:
_x = (v.conj().dot(v.T)).reshape((nfeed, nant, nfeed, nant))
for i, (ai, aj) in enumerate(antpairs):
vis[t, :, :, i] = _x[:, ai, :, aj]

_log_array("vis", vis[t])

if not (t % report_chunk or t == ntimes - 1):
plast, mlast = _log_progress(tstart, plast, t + 1, ntimes, pr, mlast)
highest_peak = _memtrace(highest_peak)

vis.shape = (ntimes, nfeed, nant, nfeed, nant)
if antpairs is None:
vis.shape = (ntimes, nfeed, nant, nfeed, nant)

# Return visibilities with or without multiple polarization channels
return vis.transpose((0, 1, 3, 2, 4)) if polarized else vis[:, 0, :, 0, :]
# Return visibilities with or without multiple polarization channels
return vis.transpose((0, 1, 3, 2, 4)) if polarized else vis[:, 0, :, 0, :]
else:
return vis if polarized else vis[:, 0, 0]


def _get_antenna_vis(
A_s, ang_freq, tau, Isqrt, beam_idx, nfeed, nant, nax, nsrcs_up
A_s, tau, Isqrt, beam_idx, nfeed, nant, nax, nsrcs_up
) -> np.ndarray:
"""Compute the antenna-wise visibility integrand."""
# Component of complex phase factor for one antenna
# (actually, b = (antpos1 - antpos2) * crd_top / c; need dot product
# below to build full phase factor for a given baseline)
v = np.exp(1.0j * (ang_freq * tau)) * Isqrt
v = np.exp(1.0j * tau) * Isqrt

# A_s has shape (Nfeed, Nbeams, Nax, Nsources)
# v has shape (Nants, Nsources) and is sqrt(I)*exp(1j tau*nu)
# Here we expand A_s to all ants (from its beams), then broadcast to v, so we
# end up with shape (Nax, Nfeed, Nants, Nsources)
# end up with shape (Nfeed, Nant, Nax, Nsources)
v = A_s[:, beam_idx] * v[np.newaxis, :, np.newaxis, :] # ^ but Nbeam -> Nant
return v.reshape((nfeed * nant, nax * nsrcs_up)) # reform into matrix

Expand Down
Loading
Loading