Skip to content

Commit

Permalink
Merge pull request #9 from AlecThomson/dev
Browse files Browse the repository at this point in the history
Dev merge
  • Loading branch information
AlecThomson authored Apr 27, 2023
2 parents ec9df92 + 8f4f09f commit 6427e86
Show file tree
Hide file tree
Showing 39 changed files with 1,086 additions and 565 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -141,3 +141,6 @@ conda-lock.yml
poetry.lock
setup.py.bak
*.bin

# Test data
test/data/testdb/*
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,6 @@
[submodule "PolSpectra"]
path = PolSpectra
url = https://github.com/AlecThomson/PolSpectra
[submodule "RMTable"]
path = RMTable
url = https://github.com/CIRADA-Tools/RMTable.git
6 changes: 5 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,23 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

- This changelog!
- `scripts/tar_cubelets.py` and CLI hook
- `makecat.py`: Added `flag_blended_components` to identify and flag blended components. Adds `is_blended_flag`, `N_blended`, `blend_ratio` to the catalogue.
- Proper logging module

### Fixed

- `columns_possum.py`: Add new Stokes I fit flags and UCDs (plus others)
- `columns_possum.py`: Add new Stokes I fit flags and UCDs (plus others) and descriptions
- `scripts/casda_prepare.py`: Refactor to make considated products and make CASDA happy
- `scripts/fix_dr1_cat.py`: Added extra columns that needed to be fixed in DR1 e.g. sbid, start_time
- Typing in various places

### Changed

- Renamed `scripts/helloworld.py` to `scripts/hellow_mpi_world.py`
- `makecat.py`: Added `compute_local_rm_flag` function
- `rmsynth_oncuts.py` Added new Stokes I fit flags
- `utils.py`: Refactored Stokes I fitting to use dicts to track values
- Use local installs of customised packages

### Removed

Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@ Scripts for processing polarized RACS data products.

## Documentation

The documentation is available at [spice-racs.readthedocs.io](https://spice-racs.readthedocs.io).
The documentation is available at [spice-racs.readthedocs.io](https://spiceracs.readthedocs.io).

## Acknowledging

If you use SPICE-RACS in your research, please cite [Thomson et al. (in prep)](https://ui.adsabs.harvard.edu/abs/).

### 3rd party software

Please also consider acknowledging the following software packages outlines in [docs](https://spice-racs.readthedocs.io/acknowledging.html).
Please also consider acknowledging the following software packages outlines in [docs](https://spiceracs.readthedocs.io/acknowledging.html).

## Contibuting

Expand Down
1 change: 1 addition & 0 deletions RMTable
Submodule RMTable added at b80155
16 changes: 5 additions & 11 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ include = [

[tool.poetry.dependencies]
python = "^3.8"
rm-tools = {git = "https://github.com/AlecThomson/RM-Tools.git@spiceracs_dev"}
rm-tools = {path = "./RM-Tools"}
astropy = "^5"
bilby = "*"
casatasks = "*"
Expand Down Expand Up @@ -63,15 +63,15 @@ vorbin = "*"
graphviz = "*"
bokeh = "*"
prefect = "<2"
RMTable = { git = "https://github.com/CIRADA-Tools/RMTable" }
PolSpectra = { git = "https://github.com/AlecThomson/PolSpectra.git@spiceracs"}
RMTable = { path = "./RMTable" }
PolSpectra = { path = "./PolSpectra"}
setuptools = "*"

[tool.poetry.dev-dependencies]
black = "^22.10"
black = "^23"
flake8 = "^5"
isort = "^5"
mypy = "^0.991"
mypy = "^1"

[tool.poetry.extras]
docs = [
Expand All @@ -83,12 +83,6 @@ docs = [
"numpydoc",
]

dev = [
"black>=22.10",
"flake8>=5",
"isort>=5",
]

[build-system]
requires = ["poetry-core>=1.2"]
build-backend = "poetry.core.masonry.api"
Expand Down
78 changes: 33 additions & 45 deletions scripts/casda_prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"""Prepare files for CASDA upload"""
import argparse
import hashlib
import logging as log
import logging
import os
import pickle
import subprocess as sp
Expand Down Expand Up @@ -46,6 +46,7 @@
from spectral_cube.cube_utils import convert_bunit
from tqdm.auto import tqdm, trange

from spiceracs.logger import logger
from spiceracs.makecat import write_votable
from spiceracs.utils import chunk_dask, tqdm_dask, try_mkdir, try_symlink, zip_equal

Expand Down Expand Up @@ -82,7 +83,7 @@ def make_thumbnail(cube_f: str, cube_dir: str):
ax.set_ylabel("Dec")
fig.colorbar(im, ax=ax, label=f"{convert_bunit(head['BUNIT']):latex_inline}")
outf = os.path.join(cube_dir, os.path.basename(cube_f).replace(".fits", ".png"))
log.info(f"Saving thumbnail to {outf}")
logger.info(f"Saving thumbnail to {outf}")
fig.savefig(outf, dpi=300)
plt.close(fig)

Expand All @@ -97,9 +98,9 @@ def find_spectra(data_dir: str = ".") -> list:
list: List of spectra in ascii format
"""
cut_dir = os.path.join(data_dir, "cutouts")
log.info(f"Globbing for spectra in {cut_dir}")
logger.info(f"Globbing for spectra in {cut_dir}")
spectra = glob(os.path.join(os.path.join(cut_dir, "*"), "*[0-9].dat"))
log.info(f"Found {len(spectra)} spectra (in frequency space)")
logger.info(f"Found {len(spectra)} spectra (in frequency space)")
return spectra


Expand Down Expand Up @@ -255,7 +256,7 @@ def convert_spectra(
outf = os.path.join(
spec_dir, os.path.basename(spectrum).replace(".dat", f"_polspec.fits")
)
log.info(f"Saving to {outf}")
logger.info(f"Saving to {outf}")
spectrum_table.write_FITS(outf, overwrite=True)
# Add object to header
# Hard code the pixel size for now
Expand Down Expand Up @@ -339,7 +340,7 @@ def update_cube(cube: str, cube_dir: str) -> None:
"image.restored.i.", f"{imtype}.{''.join(stokes)}."
),
).replace("RACS_test4_1.05_", "RACS_")
log.info(f"Writing {outf} cubelet")
logger.info(f"Writing {outf} cubelet")
fits.writeto(outf, data, header, overwrite=True)

# Move cube to cubelets directory
Expand All @@ -356,11 +357,11 @@ def find_cubes(data_dir: str = ".") -> list:
list: List of cubelets
"""
cut_dir = os.path.join(data_dir, "cutouts")
log.info(f"Globbing for cubes in {cut_dir}")
logger.info(f"Globbing for cubes in {cut_dir}")
cubes = glob(
os.path.join(os.path.join(cut_dir, "*"), "*.image.restored.i.*.linmos.fits")
)
log.info(f"Found {len(cubes)} Stokes I image cubes")
logger.info(f"Found {len(cubes)} Stokes I image cubes")
return cubes


Expand All @@ -374,11 +375,11 @@ def init_polspec(
outdir = casda_dir
polspec_0 = polspectra.from_FITS(spectrum_table_0)
out_fits = os.path.join(os.path.abspath(outdir), "spice_racs_dr1_polspec.fits")
log.info(f"Saving to {out_fits}")
logger.info(f"Saving to {out_fits}")
polspec_0.write_FITS(out_fits, overwrite=True)

out_hdf = os.path.join(os.path.abspath(outdir), "spice_racs_dr1_polspec.hdf5")
# log.info(f"Saving to {out_hdf}")
# logger.info(f"Saving to {out_hdf}")
# polspec_0.write_HDF5(out_hdf, overwrite=True, compress=True)

return out_fits, out_hdf
Expand Down Expand Up @@ -434,10 +435,10 @@ def convert_pdf(pdf_file: str, plots_dir: str, spec_dir: str) -> None:
"""
png_file = pdf_file.replace(".pdf", "")
png_file = os.path.join(plots_dir, os.path.basename(png_file))
log.info(f"Converting {pdf_file} to {png_file}")
logger.info(f"Converting {pdf_file} to {png_file}")

cmd = f"pdftoppm {pdf_file} {png_file} -png"
log.info(cmd)
logger.info(cmd)
sp.run(cmd.split(), check=True)
# Grr, pdftoppm doesn't preserve the file name
actual_name = f"{png_file}-1.png"
Expand Down Expand Up @@ -478,9 +479,9 @@ def find_plots(data_dir: str = ".") -> list:
list: List of plots
"""
cut_dir = os.path.join(data_dir, "cutouts")
log.info(f"Globbing for plots in {cut_dir}")
logger.info(f"Globbing for plots in {cut_dir}")
plots = glob(os.path.join(os.path.join(cut_dir, "RACS_*"), "*.pdf"))
log.info(f"Found {len(plots)} plots")
logger.info(f"Found {len(plots)} plots")
return plots


Expand All @@ -500,9 +501,9 @@ def main(
# Re-register astropy units
for unit in (u.deg, u.hour, u.hourangle, u.Jy, u.arcsec, u.arcmin, u.beam):
get_current_unit_registry().add_enabled_units([unit])
log.info("Starting")
log.info(f"Dask client: {client}")
log.info(f"Reading {polcatf}")
logger.info("Starting")
logger.info(f"Dask client: {client}")
logger.info(f"Reading {polcatf}")

polcat = Table.read(polcatf)
df = polcat.to_pandas()
Expand All @@ -512,7 +513,7 @@ def main(

test = prep_type == "test"

log.info(f"Preparing data for {prep_type} CASDA upload")
logger.info(f"Preparing data for {prep_type} CASDA upload")

if prep_type == "full":
pass
Expand Down Expand Up @@ -545,7 +546,7 @@ def main(

cube_outputs = []
if do_update_cubes:
log.info("Updating cubelets")
logger.info("Updating cubelets")
cube_dir = os.path.join(casda_dir, "cubelets")
try_mkdir(cube_dir)

Expand All @@ -556,14 +557,14 @@ def main(
set(polcat["source_id"])
), "Number of cubes does not match number of sources"
except AssertionError:
log.warning(
logger.warning(
f"Found {len(cubes)} cubes, expected {len(set(polcat['source_id']))}"
)
if len(cubes) < len(set(polcat["source_id"])):
log.critical("Some cubes are missing on disk!")
logger.critical("Some cubes are missing on disk!")
raise
else:
log.warning("Need to exclude some cubes")
logger.warning("Need to exclude some cubes")
source_ids = []
for i, cube in enumerate(cubes):
basename = os.path.basename(cube)
Expand All @@ -572,13 +573,13 @@ def main(
source_ids.append(source_id)
in_idx = np.isin(source_ids, polcat["source_id"])
cubes = list(np.array(cubes)[in_idx])
log.warning(
logger.warning(
f"I had to exclude {np.sum(~in_idx)} sources that were not in the catalogue"
)
# Write missing source IDs to disk
rem_ids = list(set(np.array(source_ids)[~in_idx]))
outf = os.path.join(casda_dir, "excluded_sources.txt")
log.info(f"Writing excluded source IDs to {outf}")
logger.info(f"Writing excluded source IDs to {outf}")
with open(outf, "w") as f:
for rid in rem_ids:
f.write(f"{rid}\n")
Expand Down Expand Up @@ -611,7 +612,7 @@ def my_sorter(x, lookup=lookup, pbar=pbar):

spectra_outputs = []
if do_convert_spectra:
log.info("Converting spectra")
logger.info("Converting spectra")
spec_dir = os.path.join(casda_dir, "spectra")
try_mkdir(spec_dir)
spectra = find_spectra(data_dir=data_dir)
Expand Down Expand Up @@ -672,7 +673,7 @@ def my_sorter(x, lookup=lookup, pbar=pbar):

plot_outputs = []
if do_convert_plots:
log.info("Converting plots")
logger.info("Converting plots")
plots_dir = os.path.join(casda_dir, "plots")
try_mkdir(plots_dir)
spec_dir = os.path.join(casda_dir, "spectra")
Expand Down Expand Up @@ -717,7 +718,7 @@ def my_sorter(x, lookup=lookup, pbar=pbar):
for name, outputs in zip(
("cubes", "spectra", "plots"), (cube_outputs, spectra_outputs, plot_outputs)
):
log.info(f"Starting work on {len(outputs)} {name}")
logger.info(f"Starting work on {len(outputs)} {name}")

futures = chunk_dask(
outputs=outputs,
Expand All @@ -732,7 +733,7 @@ def my_sorter(x, lookup=lookup, pbar=pbar):
spectrum_tables = client.gather(client.compute(futures))
# Add all spectrum_tables to a tar ball
tarball = os.path.join(casda_dir, f"spice_racs_dr1_polspec_{prep_type}.tar")
log.info(f"Adding spectra to tarball {tarball}")
logger.info(f"Adding spectra to tarball {tarball}")
with tarfile.open(tarball, "w") as tar:
for spectrum_table in tqdm(
spectrum_tables, "Adding spectra to tarball"
Expand All @@ -742,7 +743,7 @@ def my_sorter(x, lookup=lookup, pbar=pbar):
if do_convert_spectra:
os.remove(fname_polcat_hash)

log.info("Done")
logger.info("Done")


def cli():
Expand Down Expand Up @@ -816,22 +817,9 @@ def cli():
)
args = parser.parse_args()
if args.verbose:
log.basicConfig(
level=log.INFO,
format="%(asctime)s.%(msecs)03d %(levelname)s %(module)s - %(funcName)s: %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
logger.setLevel(logging.INFO)
elif args.debug:
log.basicConfig(
level=log.DEBUG,
format="%(asctime)s.%(msecs)03d %(levelname)s %(module)s - %(funcName)s: %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
else:
log.basicConfig(
format="%(asctime)s.%(msecs)03d %(levelname)s %(module)s - %(funcName)s: %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
logger.setLevel(logging.DEBUG)

if args.mpi:
initialize(
Expand All @@ -850,7 +838,7 @@ def cli():
with Client(
cluster,
) as client:
log.debug(f"{client=}")
logger.debug(f"{client=}")
main(
polcatf=args.polcat,
client=client,
Expand Down
Loading

0 comments on commit 6427e86

Please sign in to comment.