diff --git a/py/redrock/archetypes.py b/py/redrock/archetypes.py index 2d6ed694..eb47a0b3 100644 --- a/py/redrock/archetypes.py +++ b/py/redrock/archetypes.py @@ -17,7 +17,7 @@ from .utils import transmission_Lyman -from .zscan import per_camera_coeff_with_least_square, per_camera_coeff_with_least_square_batch_cpu, per_camera_coeff_with_least_square_batch_gpu +from .zscan import per_camera_coeff_with_least_square_batch class Archetype(): """Class to store all different archetypes from the same spectype. @@ -202,8 +202,8 @@ def nearest_neighbour_model(self, target,weights,flux,wflux,dwave,z, n_nearest, tdata[hs] = np.append(tdata[hs], legendre[hs].transpose()[None,:,:], axis=2) nbasis = tdata[hs].shape[2] if per_camera: - #Batch placeholder that right now loops over each arch but will be GPU accelerated - (zzchi2, zzcoeff) = per_camera_coeff_with_least_square_batch_cpu(spectra, tdata, weights, flux, wflux, nleg, 1, method='bvls', n_nbh=1) + #Use CPU mode since small tdata + (zzchi2, zzcoeff) = per_camera_coeff_with_least_square_batch(spectra, tdata, weights, flux, wflux, nleg, 1, method='bvls', n_nbh=n_nearest, use_gpu=False) else: #Use CPU mode for calc_zchi2 since small tdata (zzchi2, zzcoeff) = calc_zchi2_batch(spectra, tdata, weights, flux, wflux, 1, nbasis, use_gpu=False) @@ -298,9 +298,9 @@ def get_best_archetype(self,target,weights,flux,wflux,dwave,z, per_camera, n_nea nbasis = tdata[hs].shape[2] if per_camera: if (use_gpu): - (zzchi2, zzcoeff) = per_camera_coeff_with_least_square_batch_gpu(target, tdata, gpuweights, gpuflux, gpuwflux, nleg, self._narch, method=solve_method, n_nbh=1, use_gpu=use_gpu) + (zzchi2, zzcoeff) = per_camera_coeff_with_least_square_batch(target, tdata, gpuweights, gpuflux, gpuwflux, nleg, self._narch, method=solve_method, n_nbh=1, use_gpu=use_gpu) else: - (zzchi2, zzcoeff) = per_camera_coeff_with_least_square_batch_cpu(target, tdata, weights, flux, wflux, nleg, self._narch, method=solve_method, n_nbh=1) + (zzchi2, zzcoeff) = per_camera_coeff_with_least_square_batch(target, tdata, weights, flux, wflux, nleg, self._narch, method=solve_method, n_nbh=1, use_gpu=use_gpu) else: if (use_gpu): (zzchi2, zzcoeff) = calc_zchi2_batch(spectra, tdata, gpuweights, gpuflux, gpuwflux, self._narch, nbasis, use_gpu=use_gpu) diff --git a/py/redrock/zscan.py b/py/redrock/zscan.py index 061b8311..b8c127f2 100644 --- a/py/redrock/zscan.py +++ b/py/redrock/zscan.py @@ -238,38 +238,7 @@ def calc_zchi2_one(spectra, weights, flux, wflux, tdata): return zchi2, zcoeff -def Tb_for_archetype(spectra, tdata, nbasis, n_nbh, nleg): - """ - Parameters - --------------------- - - spectra (object): target spectra object - tdata (dict): template data for model fit - nbasis (int): number of templates - n_nbh (int): number of nearest best archetypes - nleg (int): number of Legendre polynomials - - Returns - -------------------- - Template matrix after applying resolution matrix - - """ - Tb = list() - for i,s in enumerate(spectra): - key = s.wavehash - tdata2 = np.zeros((tdata[key].shape[0],nbasis)) - for k in range(n_nbh): - tdata2[:,k] = tdata[key][:,k] # these are nearest archetype - if nleg>0: - for k in range(n_nbh, n_nbh+nleg): - tdata2[:,k+nleg*i] = tdata[key][:,k] # Legendre polynomials terms - Tb.append(s.Rcsr.dot(tdata2)) # this is perhaps the slowest step: - Tb = np.vstack(Tb) - return Tb - - -def per_camera_coeff_with_least_square(spectra, tdata, nleg, method=None, n_nbh=None): - +def per_camera_coeff_with_least_square_batch(target, tdata, weights, flux, wflux, nleg, narch, method=None, n_nbh=None, use_gpu=False): """ This function calculates coefficients for archetype mode in each camera using normal linear algebra matrix solver or BVLS (bounded value least square) method @@ -280,106 +249,42 @@ def per_camera_coeff_with_least_square(spectra, tdata, nleg, method=None, n_nbh= Parameters --------------------- - spectra (object): target spectra object - tdata (dict): template data for model fit + target (object): target object + tdata (dict): template data for model fit for ALL archetypes + weights (array): concatenated spectral weights (ivar). + flux (array): concatenated flux values. + wflux (array): concatenated weighted flux values. nleg (int): number of Legendre polynomials + narch (int): number of archetypes method (string): 'PCA' or 'bvls' n_nbh (int): number of nearest best archetypes + use_gpu (bool): use GPU or not Returns -------------------- coefficients and chi2 """ - # this function on an average takes 0.0007 sec for one archetye - #start = time.time() + ### TODO - implement BVLS on GPU ncam = 3 # number of cameras in DESI: b, r, z - - flux = np.concatenate([s.flux for s in spectra]) - weights = np.concatenate([s.ivar for s in spectra]) - wflux = flux*weights + spectra = target.spectra nbasis = n_nbh+nleg*ncam # n_nbh : for actual physical archetype(s), nleg: number of legendre polynomials, ncamera: number of cameras - - #linear templates in each camera (only the Legendre terms will vary per camera, the lead archetype(s) will remain same for entire spectra) - - Tb = Tb_for_archetype(spectra, tdata, nbasis, n_nbh, nleg) - M = Tb.T.dot(np.multiply(weights[:,None], Tb)) - y = Tb.T.dot(wflux) ret_zcoeff= {'alpha':[], 'b':[], 'r':[], 'z':[]} - # PCA method will use numpy Linear Algebra method to solve the best fit linear equation - if method=='pca': - try: - zcoeff = solve_matrices(M, y, solve_algorithm='PCA', use_gpu=False) - except np.linalg.LinAlgError: - return 9e+99, np.zeros(nbasis) - - # BVLS implementation with scipy - if method=='bvls': + #Setup dict of solver args to pass bounds to solver + solver_args = dict() + if (method == 'bvls'): #only positive coefficients are allowed for the archetypes bounds = np.zeros((2, nbasis)) bounds[0][n_nbh:]=-np.inf #constant and slope terms in archetype method (can be positive or negative) - bounds[1] = np.inf - try: - res = lsq_linear(M, y, bounds=bounds, method='bvls') - zcoeff = res.x - except np.linalg.LinAlgError: - return 9e+99, np.zeros(nbasis) - - model = Tb.dot(zcoeff) - zchi2 = np.dot((flux - model)**2, weights) - - # saving leading archetype coefficients in correct order - ret_zcoeff['alpha'] = [zcoeff[k] for k in range(n_nbh)] # archetype coefficient(s) - - if nleg>=1: - split_coeff = np.split(zcoeff[n_nbh:], ncam) # n_camera = 3 - # In target spectra redrock saves values as 'b', 'z', 'r'. - # So just re-ordering them here to 'b', 'r', 'z' for easier reading - old_coeff = {band: split_coeff[i] for i, band in enumerate(['b', 'z', 'r'])} - - for band in ['b', 'r', 'z']:# 3 cameras - ret_zcoeff[band] = old_coeff[band] - - coeff = np.concatenate(list(ret_zcoeff.values())) - #print(f'{time.time()-start} [sec] took for per camera BVLS method\n') - return zchi2, coeff - -def per_camera_coeff_with_least_square_batch_cpu(target, tdata, weights, flux, wflux, nleg, narch, method=None, n_nbh=None, use_gpu=False): - ### TODO - implement BVLS as a lin alg method instead of PCA and then can use calc_zchi2_batch! - ### PLACEHOLDER for algorithm that will be GPU accelerated - ncam = 3 # number of cameras in DESI: b, r, z - spectra = target.spectra - #nleg = target._nleg - #legendre = target.legendre(nleg=nleg, use_gpu=use_gpu) #Get previously calculated legendre - - zzchi2 = np.zeros(narch, dtype=np.float64) - zzcoeff = np.zeros((narch, 1+ncam*(nleg)), dtype=np.float64) - - nbasis = n_nbh+nleg*ncam # n_nbh : for actual physical archetype(s), nleg: number of legendre polynomials, ncamera: number of cameras - tdata_one = dict() - for i in range(narch): - for hs in tdata: - tdata_one[hs] = tdata[hs][i,:,:] - if (use_gpu): - tdata_one[hs] = tdata_one[hs].get() - zzchi2[i], zzcoeff[i]= per_camera_coeff_with_least_square(spectra, tdata_one, nleg, method=method, n_nbh=1) - return zzchi2, zzcoeff - - -def per_camera_coeff_with_least_square_batch_gpu(target, tdata, weights, flux, wflux, nleg, narch, method=None, n_nbh=None, use_gpu=False): - ### TODO - implement BVLS as a lin alg method instead of PCA and then can use calc_zchi2_batch! - ### PLACEHOLDER for algorithm that will be GPU accelerated - ncam = 3 # number of cameras in DESI: b, r, z - spectra = target.spectra - #nleg = target._nleg - #legendre = target.legendre(nleg=nleg, use_gpu=use_gpu) #Get previously calculated legendre - - nbasis = n_nbh+nleg*ncam # n_nbh : for actual physical archetype(s), nleg: number of legendre polynomials, ncamera: number of cameras - ret_zcoeff= {'alpha':[], 'b':[], 'r':[], 'z':[]} + bounds[1] = np.inf + solver_args['bounds'] = bounds + #Use branching options because GPU is faster in batch in 3d + #but due to timing weirdness in numpy, CPU is faster looping over + #narch and calling calc_zchi2_batch one arch at a time. if (use_gpu): #Use batch 3d array on GPU with CuPy to calculate new tdata array for i, hs in enumerate(tdata): @@ -387,30 +292,24 @@ def per_camera_coeff_with_least_square_batch_gpu(target, tdata, weights, flux, w tdata2[:,:,:n_nbh] = tdata[hs][:,:,:n_nbh] tdata2[:,:,n_nbh+i*nleg:n_nbh+(i+1)*nleg] = tdata[hs][:,:,n_nbh:] tdata[hs] = tdata2 + (zzchi2, zzcoeff) = calc_zchi2_batch(spectra, tdata, weights, flux, wflux, narch, nbasis, solve_matrices_algorithm=method.upper(), solver_args=solver_args, use_gpu=use_gpu) else: - #TODO - bizarre that this is faster than above for CPU - for i, hs in enumerate(tdata): - tdata_new = np.empty_like(tdata[hs], shape=(tdata[hs].shape[0], tdata[hs].shape[1], nbasis)) - for j in range(narch): - tdata2 = np.zeros_like(tdata[hs], shape=(tdata[hs].shape[1], nbasis)) + #Create zzchi2, zcoeff, and tdata2 dict here + tdata2 = dict() + zzchi2 = np.zeros(narch, dtype=np.float64) + zzcoeff = np.zeros((narch, n_nbh+ncam*(nleg)), dtype=np.float64) + #Loop over all arch + for j in range(narch): + #Create a 2d array in each color for tdata2 then add 3rd dimension of rank 1 + for i, hs in enumerate(tdata): + tdata2[hs] = np.zeros((tdata[hs].shape[1], nbasis)) for k in range(n_nbh): - tdata2[:,k] = tdata[hs][j,:,k] # these are nearest archetype + tdata2[hs][:,k] = tdata[hs][j,:,k] # these are nearest archetype if nleg>0: for k in range(n_nbh, n_nbh+nleg): - tdata2[:,k+nleg*i] = tdata[hs][j,:,k] # Legendre polynomials terms - tdata_new[j,:,:] = tdata2 - tdata[hs] = tdata_new - - #Setup dict of solver args to pass bounds to solver - solver_args = dict() - if (method == 'bvls'): - #only positive coefficients are allowed for the archetypes - bounds = np.zeros((2, nbasis)) - bounds[0][n_nbh:]=-np.inf #constant and slope terms in archetype method (can be positive or negative) - bounds[1] = np.inf - solver_args['bounds'] = bounds - - (zzchi2, zzcoeff) = calc_zchi2_batch(spectra, tdata, weights, flux, wflux, narch, nbasis, solve_matrices_algorithm=method.upper(), solver_args=solver_args, use_gpu=use_gpu) + tdata2[hs][:,k+nleg*i] = tdata[hs][j,:,k] # Legendre polynomials terms + tdata2[hs] = tdata2[hs][None,:,:] + zzchi2[j], zzcoeff[j] = calc_zchi2_batch(spectra, tdata2, weights, flux, wflux, 1, nbasis, solve_matrices_algorithm=method.upper(), solver_args=solver_args, use_gpu=use_gpu) # saving leading archetype coefficients in correct order ret_zcoeff['alpha'] = [zzcoeff[:,k] for k in range(n_nbh)] # archetype coefficient(s)