Merge pull request #2 from calvin-sykes/tomczak13-gsmf

Add data and converter script for Tomczak et. al. (2013) GSMF
SWIFTSIM · Jan 17, 2020 · b12cebe · b12cebe
2 parents fe4aac5 + 686ecf5
commit b12cebe
Show file tree

Hide file tree

Showing 2 changed files with 188 additions and 0 deletions.
diff --git a/data/GalaxyStellarMassFunction/conversion/convertTomczak2013.py b/data/GalaxyStellarMassFunction/conversion/convertTomczak2013.py
@@ -0,0 +1,168 @@
+from velociraptor.observations.objects import ObservationalData
+
+import unyt
+import numpy as np
+import os
+import re
+import sys
+
+
+ORIGINAL_H = 0.70
+
+
+def handle_bad_values(parts):
+    """
+    Convert "missing data" values to NaNs and fix the "errors" on these values getting
+    misinterpreted as additional data points.
+
+    parts: list of strings obtained by splitting a line from the datafile on "   "
+    """
+    n_parts = len(parts)
+    to_remove = []
+    for i in range(n_parts):
+        if parts[i] == "-99":
+            parts[i] = "NaN 0 0"
+            to_remove.extend([i + 1, i + 2])
+        elif parts[i] == "":
+            to_remove.append(i)
+    for idx in reversed(to_remove):
+        del parts[idx]
+    return parts
+
+
+def load_file_and_split_by_z(raw_file_name):
+    """
+    Read the data file and do all the mucking around needed to extract lists of the
+    redshift and stellar mass bins for which the GSMF is tabulated, along with the
+    corresponding GSMF values and their errors.
+
+    raw_file_name: the file name of the raw data file to extract the GSMF from 
+    """
+    with open(raw_file_name, "r") as f:
+        lines = f.readlines()
+    # count lines that start with comment (#) or are blank
+    n_header_lines = sum(re.match("#.+|^\s*$", l) is not None for l in lines)
+
+    z_ranges = lines[4]
+    # Each range of redshifts is separated by two or more spaces
+    z_ranges = re.split("#?\s{2,}", z_ranges)[1:]
+    z_bins_arr = np.asarray([float(z_rge.split()[0]) for z_rge in z_ranges])
+
+    n_redshift_bins = len(z_bins_arr)
+    n_stellar_mass_bins = len(lines) - n_header_lines
+    gsmf_arr = np.zeros((n_redshift_bins, n_stellar_mass_bins, 3))
+
+    mass_bins_arr = np.zeros(n_stellar_mass_bins)
+
+    for ism, l in enumerate(lines[n_header_lines:]):
+        # The GSMF values for each redshift bin are separated by two or more spaces
+        parts = re.split("#?\s{2,}", l)
+        # The first number on each line is the stellar mass bin
+        mass_bins_arr[ism] = float(parts[0])
+
+        if any(p == "-99" or p == "" for p in parts):
+            # this indicates "bad value" and the errors will be given as "0"
+            # but we will incorrectly register them as further data values
+            # due to the way the file is formatted
+            # so we need to sort this out first
+            parts = handle_bad_values(parts)
+
+        for iz, part in enumerate(parts[1:]):
+            # each redshift bin has a 3-tuple of (GSMF, +ve error, -ve error)
+            phi, errp, errn = map(float, part.split())
+            gsmf_arr[iz, ism] = phi, errp, errn
+
+    return z_bins_arr, mass_bins_arr, gsmf_arr
+
+
+def process_for_redshift(z, mstar_bins, gsmf_at_z):
+    """
+    Output an HDF5 file containing the GSMF at a given redshift.
+
+    z: the redshift to produce the GSMF for. The given value corresponds to the lower
+    edge of a range in redshift, which has width 0.25 for z < 1.5 and 0.5 for z >= 1.5
+    mstar_bins: the list of stellar mass bins for which the GSMF is tabulated
+    gsmf_at_z: the slice of the GSMF array at the chosen redshift
+    """
+
+    processed = ObservationalData()
+
+    comment = (
+        f"Assuming Chabrier IMF, quoted redshift is lower bound of range. "
+        "h-corrected for SWIFT using Cosmology: {cosmology.name}."
+    )
+    citation = "Tomczak et al (2013)"
+    bibcode = "2014ApJ...783...85T"
+    name = "GSMF from ZFOURGE/CANDELS"
+    plot_as = "points"
+    redshift = z
+    h = cosmology.h
+
+    M = 10 ** mstar_bins * (h / ORIGINAL_H) ** (-2) * unyt.Solar_Mass
+    Phi = 10 ** gsmf_at_z[:, 0] * (h / ORIGINAL_H) ** 3 * unyt.Mpc ** (-3)
+    # y_scatter should be a 1xN or 2xN array describing offsets from
+    # the median point 'y'
+    # Errors are log error dz = 1/ln(10) dy/y
+    # We want dy = y ln(10) dz
+    Phi_err = (
+        (10 ** gsmf_at_z[:, 0][:, None] * np.log(10) * gsmf_at_z[:, [2, 1]]).T
+        * (h / ORIGINAL_H) ** 3
+        * unyt.Mpc ** (-3)
+    )
+
+    processed.associate_x(
+        M, scatter=None, comoving=True, description="Galaxy Stellar Mass"
+    )
+    processed.associate_y(Phi, scatter=Phi_err, comoving=True, description="Phi (GSMF)")
+    processed.associate_citation(citation, bibcode)
+    processed.associate_name(name)
+    processed.associate_comment(comment)
+    processed.associate_redshift(redshift)
+    processed.associate_plot_as(plot_as)
+    processed.associate_cosmology(cosmology)
+
+    return processed
+
+
+def stringify_z(z):
+    """
+    Eagle-style text formatting of redshift label.
+    Example: z=1.5 will be printed as z001p500.
+
+    z: The redshift to produce a label for
+    """
+    whole = int(z)
+    frac = int(1000 * (z - whole))
+    return f"z{whole:03d}p{frac:03d}"
+
+
+# Exec the master cosmology file passed as first argument
+# These lines are _required_ and you are required to use
+# the cosmology specified (this is an astropy.cosmology
+# instance)
+with open(sys.argv[1], "r") as handle:
+    exec(handle.read())
+
+input_filename = "../raw/Tomczak2013.txt"
+
+output_filename = "Tomczak2013_{}.hdf5"
+output_directory = "../"
+
+if not os.path.exists(output_directory):
+    os.mkdir(output_directory)
+
+# z_bins, mstar_bins are 1-D ndarrays containing the lower edges of the redshift bins,
+# and the log(stellar mass) bins respectively
+# gsmf is a 3-D ndarray with axes 0 and 1 corresponding to z and stellar mass bins
+# Axis 2 ranges from 0..2 and contains log(GSMF), and the +- errors respectively
+z_bins, mstar_bins, gsmf = load_file_and_split_by_z(input_filename)
+
+for iz, z in enumerate(z_bins):
+    processed = process_for_redshift(z, mstar_bins, gsmf[iz])
+
+    output_path = f"{output_directory}/{output_filename.format(stringify_z(z))}"
+
+    if os.path.exists(output_path):
+        os.remove(output_path)
+
+    processed.write(filename=output_path)
diff --git a/data/GalaxyStellarMassFunction/raw/Tomczak2013.txt b/data/GalaxyStellarMassFunction/raw/Tomczak2013.txt
@@ -0,0 +1,20 @@
+#Observational data for galaxy stellar mass function from Tomczak et al 2013
+#Chabrier (2003) IMF used
+#Omega_M = 0.3, Omega_lambda=0.7, h=0.7
+
+#               0.5 < z < 0.75   0.75 < z < 1.0   1.0 < z < 1.25   1.25 < z < 1.5   1.5 < z < 2.0    2.0 < z < 2.5    2.5 < z < 3.0
+#log(M/M_sun)   log(Phi) + -     log(Phi) + -     log(Phi) + -     log(Phi) + -     log(Phi) + -     log(Phi) + -     log(Phi) + -
+8.25            -1.53 0.06 0.07  -99   0    0     -99   0    0     -99   0    0     -99   0    0     -99   0    0     -99   0    0
+8.50            -1.60 0.05 0.06  -1.70 0.05 0.06  -99   0    0     -99   0    0     -99   0    0     -99   0    0     -99   0    0
+8.75            -1.76 0.06 0.06  -1.86 0.05 0.06  -1.99 0.06 0.06  -2.02 0.06 0.07  -99   0    0     -99   0    0     -99   0    0
+9.00            -1.86 0.06 0.07  -2.01 0.06 0.06  -2.14 0.06 0.07  -2.14 0.06 0.07  -2.20 0.05 0.06  -99   0    0     -99   0    0
+9.25            -2.00 0.06 0.07  -2.10 0.06 0.07  -2.24 0.06 0.07  -2.28 0.06 0.07  -2.31 0.05 0.06  -2.53 0.06 0.07  -99   0    0
+9.50            -2.12 0.07 0.08  -2.23 0.06 0.07  -2.29 0.06 0.07  -2.46 0.07 0.08  -2.41 0.05 0.06  -2.50 0.06 0.07  -2.65 0.06 0.07
+9.75            -2.21 0.06 0.07  -2.39 0.07 0.08  -2.48 0.07 0.08  -2.53 0.07 0.08  -2.54 0.06 0.06  -2.63 0.06 0.07  -2.78 0.07 0.08
+10.00           -2.25 0.06 0.08  -2.45 0.07 0.09  -2.59 0.08 0.09  -2.61 0.08 0.09  -2.67 0.06 0.07  -2.74 0.07 0.08  -3.02 0.08 0.09
+10.25           -2.35 0.07 0.08  -2.45 0.07 0.09  -2.73 0.08 0.10  -2.68 0.08 0.09  -2.76 0.06 0.07  -2.91 0.08 0.09  -3.21 0.09 0.10
+10.50           -2.45 0.07 0.09  -2.52 0.08 0.09  -2.64 0.07 0.09  -2.71 0.08 0.09  -2.87 0.07 0.08  -3.07 0.09 0.10  -3.35 0.10 0.13
+10.75           -2.55 0.08 0.09  -2.59 0.08 0.10  -2.72 0.08 0.10  -2.84 0.08 0.10  -3.03 0.08 0.09  -3.35 0.10 0.13  -3.74 0.13 0.17
+11.00           -2.82 0.09 0.11  -2.93 0.10 0.13  -3.01 0.10 0.12  -3.12 0.10 0.13  -3.13 0.08 0.10  -3.54 0.12 0.16  -4.00 0.18 0.25
+11.25           -3.32 0.10 0.13  -3.47 0.11 0.15  -3.62 0.11 0.15  -3.65 0.12 0.16  -3.56 0.10 0.13  -3.89 0.12 0.17  -4.14 0.17 0.28
+11.50           -99   0    0     -99   0    0     -99   0    0     -4.99 0.30 0.41  -4.27 0.12 0.15  -4.41 0.14 0.19  -4.73 0.31 2.00