Merge branch 'master' into ellipticity

LSSTDESC · Feb 24, 2018 · d19f32a · d19f32a
2 parents 1d9f445 + ded0cf1
commit d19f32a
Show file tree

Hide file tree

Showing 25 changed files with 1,745 additions and 121 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -6,12 +6,18 @@ python:
 - "2.7"
 - "3.6"
 
+before_install: # camb requires gfortran to be at least gfortran-4.9
+- sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y
+- sudo apt-get update -qq
+- sudo apt-get install gfortran-4.9 -y
+- sudo ln -s /usr/bin/gfortran-4.9 /usr/bin/gfortran
+
 install:
 - pip install .[full]
 - pip install pylint nose
 
 before_script:
-- export CHANGED=$(git diff --name-only $TRAVIS_COMMIT_RANGE | grep '^\(descqa.*\|tests\)/.\+\.py$')
+- export CHANGED=$(git diff --diff-filter=d --name-only $TRAVIS_COMMIT_RANGE | grep '^\(descqa.*\|tests\)/.\+\.py$')
 
 script:
 - nosetests

diff --git a/descqa/SizeDistribution.py b/descqa/SizeDistribution.py
@@ -0,0 +1,114 @@
+import os
+import numpy as np
+from itertools import count
+from .base import BaseValidationTest, TestResult
+from .plotting import plt
+from .utils import get_opt_binpoints
+
+__all__ = ['SizeDistribution']
+
+class SizeDistribution(BaseValidationTest):
+    """
+    validation test to check the slope of the size distribution at small sizes.
+    """
+
+    #plotting constants
+    validation_color = 'black'
+    validation_marker = 'o'
+    default_markers = ['v', 's', 'd', 'H', '^', 'D', 'h', '<', '>', '.']
+    msize = 4 #marker-size
+    yaxis_xoffset = 0.02
+    yaxis_yoffset = 0.5
+
+    def __init__(self, **kwargs):
+        #pylint: disable=W0231
+        #validation data
+        validation_filepath = os.path.join(self.data_dir, kwargs['data_filename'])
+        self.validation_data = np.loadtxt(validation_filepath)
+
+        self.acceptable_keys = kwargs['possible_size_fields']
+
+        self._color_iterator = ('C{}'.format(i) for i in count())
+
+    def run_on_single_catalog(self, catalog_instance, catalog_name, output_dir):
+        # update color and marker to preserve catalog colors and markers across tests
+        catalog_color = next(self._color_iterator)
+
+        # check catalog data for required quantities
+        key = catalog_instance.first_available(*self.acceptable_keys)
+        if not key:
+            summary = 'Missing required quantity' + ' or '.join(['{}']*len(self.acceptable_keys))
+            return TestResult(skipped=True, summary=summary.format(*self.acceptable_keys))
+
+        # get data
+        catalog_data = catalog_instance.get_quantities(key)
+        sizes = catalog_data[key]
+        good_data_mask = np.logical_not(np.logical_or(np.isinf(sizes), np.isnan(sizes)))
+        sizes = sizes[good_data_mask]
+        non_neg_mask = sizes > 0
+        if np.sum(non_neg_mask) > 0:
+            print('Warning: some sizes were negative or zero; these are being masked')
+            sizes = sizes[non_neg_mask]
+        min_sizes = np.min(sizes)
+        max_sizes = np.max(sizes)
+
+        # Compute N(size) and its slope at the small end.
+        # Things seem to be roughly linear where N(size)>0.5*Ntot so use those points.
+        # Get ~20 points for the line fit, but compute the whole graph
+        median = np.median(sizes)
+        n_bins = int(20*(max_sizes-min_sizes)/(median-min_sizes))
+        N, bin_edges = np.histogram(sizes, bins=n_bins)
+        sumM = np.histogram(sizes, weights=sizes, bins=bin_edges)[0]
+        sumM2 = np.histogram(sizes, weights=sizes**2, bins=bin_edges)[0]
+        size_pts = get_opt_binpoints(N, sumM, sumM2, bin_edges)
+        diff = size_pts[1:] - size_pts[:-1]
+        if not np.all(diff >= 0):
+            # Sparsely populated bins sometimes cause problems for
+            # get_opt_binpoints; replace with the dumb solution
+            size_pts = 0.5*(bin_edges[:-1]+bin_edges[1:])
+
+        mask = size_pts < median
+
+        # Normalize so we can compare datasets of different sizes
+        cumul_N_norm = np.array(
+                [1.0*np.sum(N[-i-1:]) for i in range(len(N))], dtype=float
+            )[::-1]/np.sum(N)
+        data_slope, data_intercept = np.polyfit(size_pts[mask], cumul_N_norm[mask], 1)
+
+        # Compute the slope for the validation dataset in this size range.
+        # Copy the validation dataset so we can play with it
+        validation_data = self.validation_data.copy()
+        validation_mask = (validation_data[:, 0] > min_sizes) & (validation_data[:, 0] < median)
+        validation_data[:, 1] /= validation_data[validation_mask, 1][0]
+        validation_slope, _ = np.polyfit(validation_data[validation_mask, 0],
+                                         validation_data[validation_mask, 1], 1)
+
+        # plot a histogram of sizes. This is easier to see as log(sizes) so do that.
+        fig, (hist_ax, cumul_ax) = plt.subplots(1, 2)
+        fig.subplots_adjust(wspace=0.4)
+        hist_ax.hist(np.log10(sizes), color=catalog_color, edgecolor='black', alpha=0.75,
+                     normed=True, bins=20)
+        hist_ax.set_xlabel("Log10({})".format(key))
+        hist_ax.set_ylabel("dN/d log({})".format(key))
+
+        # plot the CDF and the line fit
+        cumul_ax.plot(size_pts, cumul_N_norm, color=catalog_color)
+        cumul_ax.plot(size_pts[mask], (data_intercept+data_slope*size_pts[mask]), color='gray')
+        cumul_ax.set_xscale('log')
+        cumul_ax.text(0.95, 0.96,
+                      'validation=${:.3f}$\nslope=${:.3f}$'.format(validation_slope, data_slope),
+                      horizontalalignment='right', verticalalignment='top',
+                      transform=cumul_ax.transAxes)
+        cumul_ax.set_xlabel("{}".format(key))
+        cumul_ax.set_ylabel("N({})".format(key))
+
+        with open(os.path.join(output_dir, 'size_distribution_{}.txt'.format(catalog_name)), 'w'
+                 ) as f:
+            f.write("# Slope, intercept\n")
+            f.write("%7f  %9f\n"%(data_slope, data_intercept))
+
+        fig.savefig(os.path.join(output_dir, 'size_distribution_{}.png'.format(catalog_name)))
+        plt.close(fig)
+        return TestResult(inspect_only=True)
+
+
diff --git a/descqa/SizeStellarMassLuminosity.py b/descqa/SizeStellarMassLuminosity.py
@@ -0,0 +1,175 @@
+from __future__ import print_function, division, unicode_literals, absolute_import
+import os
+import numpy as np
+from GCR import GCRQuery
+from scipy import interpolate
+from scipy.stats import binned_statistic
+
+from .base import BaseValidationTest, TestResult
+from .plotting import plt
+
+
+__all__ = ['SizeStellarMassLuminosity']
+
+
+def redshift2dist(cosmology):
+    z = np.arange(0, 5.1, 0.5)
+    comov_d = cosmology.comoving_distance(z).to('kpc').value
+    spl = interpolate.splrep(z, comov_d)
+    return spl
+
+
+class SizeStellarMassLuminosity(BaseValidationTest):
+    """
+    Validation test of 2pt correlation function
+    """
+    _ARCSEC_TO_RADIAN = np.pi / 180. / 3600.
+
+    def __init__(self, **kwargs):
+        #pylint: disable=W0231
+        self.kwargs = kwargs
+        self.observation = kwargs['observation']
+        self.possible_mag_fields = kwargs['possible_mag_fields']
+        self.test_name = kwargs['test_name']
+        self.data_label = kwargs['data_label']
+        self.z_bins = kwargs['z_bins']
+        self.output_filename_template = kwargs['output_filename_template']
+        self.label_template = kwargs['label_template']
+        self.fig_xlabel = kwargs['fig_xlabel']
+        self.fig_ylabel = kwargs['fig_ylabel']
+
+        validation_filepath = os.path.join(self.data_dir, kwargs['data_filename'])
+        self.validation_data = np.genfromtxt(validation_filepath)
+
+    @staticmethod
+    def ConvertAbsMagLuminosity(AbsM, band):
+        '''AbsM: absolute magnitude, band: filter'''
+        AbsM = np.asarray(AbsM)
+
+        bands = {'U':5.61, 'B':5.48, 'V':4.83, 'R':4.42, 'I':4.08,
+                 'J':3.64, 'H':3.32, 'K':3.28, 'g':5.33, 'r':4.67,
+                 'i':4.48, 'z':4.42, 'F300W':6.09, 'F450W':5.32, 'F555W':4.85,
+                 'F606W':4.66, 'F702W':4.32, 'F814W':4.15, 'CFHT_U':5.57,
+                 'CFHT_B':5.49, 'CFHT_V':4.81, 'CFHT_R':4.44, 'CFHT_I':4.06,
+                 'NIRI_J':3.64, 'NIRI_H':3.33, 'NIRI_K':3.29}
+
+        if band in bands.keys():
+            AbsSun = bands[band]
+        else:
+            raise ValueError('Filter not implemented')
+
+        logL = (AbsSun - AbsM) / 2.5 #unit of sun
+        return logL
+
+    def run_on_single_catalog(self, catalog_instance, catalog_name, output_dir):
+        '''
+        Loop over magnitude cuts and make plots
+        '''
+        # load catalog data
+        spl = redshift2dist(catalog_instance.cosmology)
+
+        colnames = dict()
+        colnames['z'] = catalog_instance.first_available('redshift', 'redshift_true')
+        colnames['mag'] = catalog_instance.first_available(*self.possible_mag_fields)
+        if self.observation == 'onecomp':
+            colnames['size'] = catalog_instance.first_available('size', 'size_true')
+        elif self.observation == 'twocomp':
+            colnames['size_bulge'] = catalog_instance.first_available('size_bulge', 'size_bulge_true')
+            colnames['size_disk'] = catalog_instance.first_available('size_disk', 'size_disk_true')
+
+        if not all(v for v in colnames.values()):
+            return TestResult(skipped=True, summary='Missing requested quantities')
+        #Check whether the columns are finite or not
+        filters = [(np.isfinite, c) for c in colnames.values()]
+
+        #Select objects within maximum and minimum redshift of all the bins
+        filters.extend((
+            '{} < {}'.format(colnames['z'], max(z_bin['z_max'] for z_bin in self.z_bins)),
+            '{} >= {}'.format(colnames['z'], min(z_bin['z_min'] for z_bin in self.z_bins)),
+        ))
+        catalog_data = catalog_instance.get_quantities(list(colnames.values()), filters=filters)
+        catalog_data = {k: catalog_data[v] for k, v in colnames.items()}
+
+        fig, axes = plt.subplots(2, 3, figsize=(9, 6), sharex=True, sharey=True)
+        try:
+            col = 0
+            row = 0
+            for z_bin in self.z_bins:
+                ax = axes[row, col]
+                # filter catalog data for this bin
+                filters = [
+                    'z < {}'.format(z_bin['z_max']),
+                    'z >= {}'.format(z_bin['z_min']),
+                ]
+
+                catalog_data_this = GCRQuery(*filters).filter(catalog_data)
+                if len(catalog_data_this['z']) == 0:
+                    continue
+                z_mean = (z_bin['z_max'] + z_bin['z_min']) / 2.
+                output_filepath = os.path.join(output_dir, self.output_filename_template.format(z_bin['z_min'], z_bin['z_max']))
+                colors = ['r', 'b']
+                default_L_bin_edges = np.array([9, 9.5, 10, 10.5, 11, 11.5])
+                default_L_bins = (default_L_bin_edges[1:] + default_L_bin_edges[:-1]) / 2.
+                if self.observation == 'onecomp':
+                    logL_G = self.ConvertAbsMagLuminosity(catalog_data_this['mag'], 'g')
+                    size_kpc = catalog_data_this['size'] * self._ARCSEC_TO_RADIAN * interpolate.splev(catalog_data_this['z'], spl) / (1 + catalog_data_this['z'])
+                    binned_size_kpc = binned_statistic(logL_G, size_kpc, bins=default_L_bin_edges, statistic='mean')[0]
+                    binned_size_kpc_err = binned_statistic(logL_G, size_kpc, bins=default_L_bin_edges, statistic='std')[0]
+
+                    np.savetxt(output_filepath, np.transpose((default_L_bins, binned_size_kpc, binned_size_kpc_err)))
+
+                    validation_this = self.validation_data[(self.validation_data[:,0] < z_mean + 0.25) & (self.validation_data[:,0] > z_mean - 0.25)]
+
+                    ax.semilogy(validation_this[:,1], 10**validation_this[:,2], label=self.label_template.format(z_bin['z_min'], z_bin['z_max']))
+                    ax.fill_between(validation_this[:,1], 10**validation_this[:,3], 10**validation_this[:,4], lw=0, alpha=0.2)
+                    ax.errorbar(default_L_bins, binned_size_kpc, binned_size_kpc_err, marker='o', ls='')
+                elif self.observation == 'twocomp':
+                    logL_I = self.ConvertAbsMagLuminosity(catalog_data_this['mag'], 'i')
+                    arcsec_to_kpc = self._ARCSEC_TO_RADIAN * interpolate.splev(catalog_data_this['z'], spl) / (1 + catalog_data_this['z'])
+
+                    binned_bulgesize_kpc = binned_statistic(logL_I, catalog_data_this['size_bulge'] * arcsec_to_kpc, bins=default_L_bin_edges, statistic='mean')[0]
+                    binned_bulgesize_kpc_err = binned_statistic(logL_I, catalog_data_this['size_bulge'] * arcsec_to_kpc, bins=default_L_bin_edges, statistic='std')[0]
+                    binned_disksize_kpc = binned_statistic(logL_I, catalog_data_this['size_disk'] * arcsec_to_kpc, bins=default_L_bin_edges, statistic='mean')[0]
+                    binned_disksize_kpc_err = binned_statistic(logL_I, catalog_data_this['size_disk'] * arcsec_to_kpc, bins=default_L_bin_edges, statistic='std')[0]
+                    binned_bulgesize_kpc = np.nan_to_num(binned_bulgesize_kpc)
+                    binned_bulgesize_kpc_err = np.nan_to_num(binned_bulgesize_kpc_err)
+                    binned_disksize_kpc = np.nan_to_num(binned_disksize_kpc)
+                    binned_disksize_kpc_err = np.nan_to_num(binned_disksize_kpc_err)
+                    np.savetxt(output_filepath, np.transpose((default_L_bins, binned_bulgesize_kpc, binned_bulgesize_kpc_err, binned_disksize_kpc, binned_disksize_kpc_err)))
+
+                    validation_this = self.validation_data[(self.validation_data[:,0] < z_mean + 0.25) & (self.validation_data[:,0] > z_mean - 0.25)]
+
+                    ax.text(11, 0.3, self.label_template.format(z_bin['z_min'], z_bin['z_max']))
+                    ax.semilogy(validation_this[:,1], validation_this[:,2], label='Bulge', color=colors[0])
+                    ax.fill_between(validation_this[:,1], validation_this[:,2] + validation_this[:,4], validation_this[:,2] - validation_this[:,4], lw=0, alpha=0.2, facecolor=colors[0])
+                    ax.semilogy(validation_this[:,1] + 0.2, validation_this[:,3], label='Disk', color=colors[1])
+                    ax.fill_between(validation_this[:,1] + 0.2, validation_this[:,3] + validation_this[:,5], validation_this[:,3] - validation_this[:,5], lw=0, alpha=0.2, facecolor=colors[1])
+
+                    ax.errorbar(default_L_bins, binned_bulgesize_kpc, binned_bulgesize_kpc_err, marker='o', ls='', c=colors[0])
+                    ax.errorbar(default_L_bins+0.2, binned_disksize_kpc, binned_disksize_kpc_err, marker='o', ls='', c=colors[1])
+                    ax.set_xlim([9, 13])
+                    ax.set_ylim([1e-1, 25])
+                    ax.set_yscale('log', nonposy='clip')
+                del catalog_data_this
+
+                col += 1
+                if col > 2:
+                    col = 0
+                    row += 1
+
+                ax.legend(loc='best')
+
+            fig.add_subplot(111, frameon=False)
+            # hide tick and tick label of the big axes
+            plt.tick_params(labelcolor='none', which='both', top='off', bottom='off', left='off', right='off')
+            plt.grid(False)
+            plt.xlabel(self.fig_xlabel)
+            plt.ylabel(self.fig_ylabel)
+            fig.subplots_adjust(hspace=0, wspace=0.2)
+            fig.suptitle('{} ($M_V$) vs. {}'.format(catalog_name, self.data_label), fontsize='medium', y=0.93)
+        finally:
+            fig.savefig(os.path.join(output_dir, '{:s}.png'.format(self.test_name)), bbox_inches='tight')
+            plt.close(fig)
+
+        #TODO: calculate summary statistics
+        return TestResult(inspect_only=True)
diff --git a/descqa/configs/shear.yaml b/descqa/configs/shear.yaml
@@ -0,0 +1,16 @@
+subclass_name: shear_test.ShearTest
+z: 'redshift_true'
+ra: 'ra'
+dec: 'dec'
+e1: 'shear_1'
+e2: 'shear_2'
+kappa: 'convergence'
+nbins: 6
+min_sep: 2.5
+max_sep: 200
+sep_units: 'arcmin'
+bin_slop: 0.07
+zlo: 0.7
+zhi: 0.8
+do_jackknife: False
+N_clust: 10
diff --git a/descqa/configs/size_Mandelbaum2014_BD.yaml b/descqa/configs/size_Mandelbaum2014_BD.yaml
@@ -0,0 +1,30 @@
+subclass_name: SizeStellarMassLuminosity.SizeStellarMassLuminosity
+
+#observation: either protodc or buzzard
+observation: twocomp
+
+possible_mag_fields:
+  - Mag_true_i_lsst_z0
+  - Mag_true_i_sdss_z0
+  - Mag_true_i_des_z0
+
+mag_bin_separation: 1
+
+output_filename_template: 'size_COSMOS_z_{}_{}.dat'
+
+label_template: '${} < z < {}$'
+
+data_filename: 'size/Mandelbaum2014_LumF814W_size_bulge_disk.txt'
+data_label: 'Mandelbaum+2014: F814W comparing mean and scatter'
+
+z_bins:
+  - {z_min: 0.0, z_max: 0.5}
+  - {z_min: 0.5, z_max: 1.0}
+  - {z_min: 1.0, z_max: 1.5}
+
+fig_xlabel: '$(L/L_{\odot})_{I}$'
+fig_ylabel: '$\log_{10}(R_e)$ (kpc)'
+
+
+description: |
+  Compare evolution of bulge and disk sizes as a function of i-band magnitude of LSST and redshift and comparing to Mandelbaum et al (2015) HST COSMOS F814W observations
diff --git a/descqa/configs/size_dist_COSMOS.yaml b/descqa/configs/size_dist_COSMOS.yaml
@@ -0,0 +1,9 @@
+subclass_name: SizeDistribution.SizeDistribution
+
+possible_size_fields:
+  - size
+  - size_true
+
+data_filename: 'size_dist/COSMOS_Great3_sample_size_distribution.txt'
+
+description: Compare slope of N(size) to the same slope in the Great3 COSMOS postage stamps