Skip to content

Commit

Permalink
Merge pull request #2 from calvin-sykes/tomczak13-gsmf
Browse files Browse the repository at this point in the history
Add data and converter script for Tomczak et. al. (2013) GSMF
  • Loading branch information
JBorrow authored Jan 17, 2020
2 parents fe4aac5 + 686ecf5 commit b12cebe
Show file tree
Hide file tree
Showing 2 changed files with 188 additions and 0 deletions.
168 changes: 168 additions & 0 deletions data/GalaxyStellarMassFunction/conversion/convertTomczak2013.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
from velociraptor.observations.objects import ObservationalData

import unyt
import numpy as np
import os
import re
import sys


ORIGINAL_H = 0.70


def handle_bad_values(parts):
"""
Convert "missing data" values to NaNs and fix the "errors" on these values getting
misinterpreted as additional data points.
parts: list of strings obtained by splitting a line from the datafile on " "
"""
n_parts = len(parts)
to_remove = []
for i in range(n_parts):
if parts[i] == "-99":
parts[i] = "NaN 0 0"
to_remove.extend([i + 1, i + 2])
elif parts[i] == "":
to_remove.append(i)
for idx in reversed(to_remove):
del parts[idx]
return parts


def load_file_and_split_by_z(raw_file_name):
"""
Read the data file and do all the mucking around needed to extract lists of the
redshift and stellar mass bins for which the GSMF is tabulated, along with the
corresponding GSMF values and their errors.
raw_file_name: the file name of the raw data file to extract the GSMF from
"""
with open(raw_file_name, "r") as f:
lines = f.readlines()
# count lines that start with comment (#) or are blank
n_header_lines = sum(re.match("#.+|^\s*$", l) is not None for l in lines)

z_ranges = lines[4]
# Each range of redshifts is separated by two or more spaces
z_ranges = re.split("#?\s{2,}", z_ranges)[1:]
z_bins_arr = np.asarray([float(z_rge.split()[0]) for z_rge in z_ranges])

n_redshift_bins = len(z_bins_arr)
n_stellar_mass_bins = len(lines) - n_header_lines
gsmf_arr = np.zeros((n_redshift_bins, n_stellar_mass_bins, 3))

mass_bins_arr = np.zeros(n_stellar_mass_bins)

for ism, l in enumerate(lines[n_header_lines:]):
# The GSMF values for each redshift bin are separated by two or more spaces
parts = re.split("#?\s{2,}", l)
# The first number on each line is the stellar mass bin
mass_bins_arr[ism] = float(parts[0])

if any(p == "-99" or p == "" for p in parts):
# this indicates "bad value" and the errors will be given as "0"
# but we will incorrectly register them as further data values
# due to the way the file is formatted
# so we need to sort this out first
parts = handle_bad_values(parts)

for iz, part in enumerate(parts[1:]):
# each redshift bin has a 3-tuple of (GSMF, +ve error, -ve error)
phi, errp, errn = map(float, part.split())
gsmf_arr[iz, ism] = phi, errp, errn

return z_bins_arr, mass_bins_arr, gsmf_arr


def process_for_redshift(z, mstar_bins, gsmf_at_z):
"""
Output an HDF5 file containing the GSMF at a given redshift.
z: the redshift to produce the GSMF for. The given value corresponds to the lower
edge of a range in redshift, which has width 0.25 for z < 1.5 and 0.5 for z >= 1.5
mstar_bins: the list of stellar mass bins for which the GSMF is tabulated
gsmf_at_z: the slice of the GSMF array at the chosen redshift
"""

processed = ObservationalData()

comment = (
f"Assuming Chabrier IMF, quoted redshift is lower bound of range. "
"h-corrected for SWIFT using Cosmology: {cosmology.name}."
)
citation = "Tomczak et al (2013)"
bibcode = "2014ApJ...783...85T"
name = "GSMF from ZFOURGE/CANDELS"
plot_as = "points"
redshift = z
h = cosmology.h

M = 10 ** mstar_bins * (h / ORIGINAL_H) ** (-2) * unyt.Solar_Mass
Phi = 10 ** gsmf_at_z[:, 0] * (h / ORIGINAL_H) ** 3 * unyt.Mpc ** (-3)
# y_scatter should be a 1xN or 2xN array describing offsets from
# the median point 'y'
# Errors are log error dz = 1/ln(10) dy/y
# We want dy = y ln(10) dz
Phi_err = (
(10 ** gsmf_at_z[:, 0][:, None] * np.log(10) * gsmf_at_z[:, [2, 1]]).T
* (h / ORIGINAL_H) ** 3
* unyt.Mpc ** (-3)
)

processed.associate_x(
M, scatter=None, comoving=True, description="Galaxy Stellar Mass"
)
processed.associate_y(Phi, scatter=Phi_err, comoving=True, description="Phi (GSMF)")
processed.associate_citation(citation, bibcode)
processed.associate_name(name)
processed.associate_comment(comment)
processed.associate_redshift(redshift)
processed.associate_plot_as(plot_as)
processed.associate_cosmology(cosmology)

return processed


def stringify_z(z):
"""
Eagle-style text formatting of redshift label.
Example: z=1.5 will be printed as z001p500.
z: The redshift to produce a label for
"""
whole = int(z)
frac = int(1000 * (z - whole))
return f"z{whole:03d}p{frac:03d}"


# Exec the master cosmology file passed as first argument
# These lines are _required_ and you are required to use
# the cosmology specified (this is an astropy.cosmology
# instance)
with open(sys.argv[1], "r") as handle:
exec(handle.read())

input_filename = "../raw/Tomczak2013.txt"

output_filename = "Tomczak2013_{}.hdf5"
output_directory = "../"

if not os.path.exists(output_directory):
os.mkdir(output_directory)

# z_bins, mstar_bins are 1-D ndarrays containing the lower edges of the redshift bins,
# and the log(stellar mass) bins respectively
# gsmf is a 3-D ndarray with axes 0 and 1 corresponding to z and stellar mass bins
# Axis 2 ranges from 0..2 and contains log(GSMF), and the +- errors respectively
z_bins, mstar_bins, gsmf = load_file_and_split_by_z(input_filename)

for iz, z in enumerate(z_bins):
processed = process_for_redshift(z, mstar_bins, gsmf[iz])

output_path = f"{output_directory}/{output_filename.format(stringify_z(z))}"

if os.path.exists(output_path):
os.remove(output_path)

processed.write(filename=output_path)
20 changes: 20 additions & 0 deletions data/GalaxyStellarMassFunction/raw/Tomczak2013.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#Observational data for galaxy stellar mass function from Tomczak et al 2013
#Chabrier (2003) IMF used
#Omega_M = 0.3, Omega_lambda=0.7, h=0.7

# 0.5 < z < 0.75 0.75 < z < 1.0 1.0 < z < 1.25 1.25 < z < 1.5 1.5 < z < 2.0 2.0 < z < 2.5 2.5 < z < 3.0
#log(M/M_sun) log(Phi) + - log(Phi) + - log(Phi) + - log(Phi) + - log(Phi) + - log(Phi) + - log(Phi) + -
8.25 -1.53 0.06 0.07 -99 0 0 -99 0 0 -99 0 0 -99 0 0 -99 0 0 -99 0 0
8.50 -1.60 0.05 0.06 -1.70 0.05 0.06 -99 0 0 -99 0 0 -99 0 0 -99 0 0 -99 0 0
8.75 -1.76 0.06 0.06 -1.86 0.05 0.06 -1.99 0.06 0.06 -2.02 0.06 0.07 -99 0 0 -99 0 0 -99 0 0
9.00 -1.86 0.06 0.07 -2.01 0.06 0.06 -2.14 0.06 0.07 -2.14 0.06 0.07 -2.20 0.05 0.06 -99 0 0 -99 0 0
9.25 -2.00 0.06 0.07 -2.10 0.06 0.07 -2.24 0.06 0.07 -2.28 0.06 0.07 -2.31 0.05 0.06 -2.53 0.06 0.07 -99 0 0
9.50 -2.12 0.07 0.08 -2.23 0.06 0.07 -2.29 0.06 0.07 -2.46 0.07 0.08 -2.41 0.05 0.06 -2.50 0.06 0.07 -2.65 0.06 0.07
9.75 -2.21 0.06 0.07 -2.39 0.07 0.08 -2.48 0.07 0.08 -2.53 0.07 0.08 -2.54 0.06 0.06 -2.63 0.06 0.07 -2.78 0.07 0.08
10.00 -2.25 0.06 0.08 -2.45 0.07 0.09 -2.59 0.08 0.09 -2.61 0.08 0.09 -2.67 0.06 0.07 -2.74 0.07 0.08 -3.02 0.08 0.09
10.25 -2.35 0.07 0.08 -2.45 0.07 0.09 -2.73 0.08 0.10 -2.68 0.08 0.09 -2.76 0.06 0.07 -2.91 0.08 0.09 -3.21 0.09 0.10
10.50 -2.45 0.07 0.09 -2.52 0.08 0.09 -2.64 0.07 0.09 -2.71 0.08 0.09 -2.87 0.07 0.08 -3.07 0.09 0.10 -3.35 0.10 0.13
10.75 -2.55 0.08 0.09 -2.59 0.08 0.10 -2.72 0.08 0.10 -2.84 0.08 0.10 -3.03 0.08 0.09 -3.35 0.10 0.13 -3.74 0.13 0.17
11.00 -2.82 0.09 0.11 -2.93 0.10 0.13 -3.01 0.10 0.12 -3.12 0.10 0.13 -3.13 0.08 0.10 -3.54 0.12 0.16 -4.00 0.18 0.25
11.25 -3.32 0.10 0.13 -3.47 0.11 0.15 -3.62 0.11 0.15 -3.65 0.12 0.16 -3.56 0.10 0.13 -3.89 0.12 0.17 -4.14 0.17 0.28
11.50 -99 0 0 -99 0 0 -99 0 0 -4.99 0.30 0.41 -4.27 0.12 0.15 -4.41 0.14 0.19 -4.73 0.31 2.00

0 comments on commit b12cebe

Please sign in to comment.