diff --git a/lib/Biodiverse/Indices.pm b/lib/Biodiverse/Indices.pm index 8de29f498..2892b286f 100644 --- a/lib/Biodiverse/Indices.pm +++ b/lib/Biodiverse/Indices.pm @@ -15,6 +15,7 @@ use List::MoreUtils qw /uniq/; use List::Util qw /sum/; use English ( -no_match_vars ); use Ref::Util qw { :all }; +use JSON::MaybeXS; #use MRO::Compat; use Class::Inspector; @@ -96,7 +97,7 @@ sub get_calculations { my %calculations; - my $list = Class::Inspector->methods( blessed $self); + my $list = Class::Inspector->methods( blessed $self ); foreach my $method ( grep { $_ =~ /^calc_/ } @$list ) { next if $method =~ /calc_abc\d?$/; # skip calc_abc1,2&3 @@ -355,6 +356,36 @@ sub get_calculation_metadata_as_wiki { return $html; } +sub get_calculation_metadata { + my $self = shift; + + if (!blessed $self) { + state $default_bd = Biodiverse::BaseData->new ( + NAME => 'for indices', + CELL_SIZES => [1,1], + ); + $self = __PACKAGE__->new (BASEDATA_REF => $default_bd); + } + + my %calculations = $self->get_calculations_as_flat_hash(); + + my %calculation_hash; + foreach my $calc_sub ( sort keys %calculations ) { + my $ref = $self->get_metadata( sub => $calc_sub ); + $calculation_hash{$calc_sub} = $ref; + } + + return wantarray ? %calculation_hash : \%calculation_hash; +} + +sub get_calculation_metadata_as_json { + my $self = shift; + my $metadata = $self->get_calculation_metadata; + my $json_obj = JSON::MaybeXS::JSON()->new; + $json_obj->convert_blessed(1); + return $json_obj->encode($metadata); +} + # now we have moved to github sub get_calculation_metadata_as_markdown { my $self = shift; @@ -1303,8 +1334,7 @@ sub get_ratio_indices { foreach my $calculations ( keys %$list ) { my $meta = $self->get_metadata( sub => $calculations ); INDEX: - foreach my $index ( keys %{ $meta->get_indices } ) { - next INDEX if !$meta->get_index_is_ratio($index); + foreach my $index ( grep {$meta->get_index_is_ratio($_)} keys %{ $meta->get_indices } ) { $indices{$index} = $meta->get_index_description($index); } } @@ -1348,6 +1378,32 @@ sub index_is_divergent { return !!$hash->{$args{index} // ''}; } +sub get_index_bounds { + my ($self, %args) = @_; + my $index = $args{index}; + + # return undef if !$self->index_is_scalar(index => $index); + + my $index_source = $self->get_index_source(index => $index); + my $meta = $self->get_metadata( sub => $index_source ); + + my $bounds = $meta->get_index_bounds ($index); + + return $bounds; +} + +sub index_distribution_is_valid { + my $self = shift; + my %args = @_; + my $index = $args{index}; + + my $index_source = $self->get_index_source(index => $index); + my $meta = $self->get_metadata( sub => $index_source ); + + return $meta->index_distribution_is_valid ($index); +} + + sub get_valid_calculations_to_run { my $self = shift; diff --git a/lib/Biodiverse/Indices/Endemism.pm b/lib/Biodiverse/Indices/Endemism.pm index dccae57cb..6388ff7a4 100644 --- a/lib/Biodiverse/Indices/Endemism.pm +++ b/lib/Biodiverse/Indices/Endemism.pm @@ -22,6 +22,7 @@ sub get_metadata_calc_endemism_central_normalised { calc_elements_used }], uses_nbr_lists => 1, # how many sets of lists it must have + distribution => 'nonnegative', indices => { ENDC_CWE_NORM => { description => 'Corrected weighted endemism normalised by groups', @@ -72,16 +73,18 @@ sub get_metadata_calc_endemism_whole_normalised { uses_nbr_lists => 1, # how many sets of lists it must have indices => { ENDW_CWE_NORM => { - description => 'Corrected weighted endemism normalised by groups', - formula => [ + description => 'Corrected weighted endemism normalised by groups', + formula => [ '= \frac{ENDW\_CWE}{EL\_COUNT\_ALL}', ], + distribution => 'unit_interval', }, ENDW_WE_NORM => { description => 'Weighted endemism normalised by groups', formula => [ '= \frac{ENDW\_WE}{EL\_COUNT\_ALL}', ], + distribution => 'nonnegative', }, }, ); # add to if needed @@ -153,6 +156,7 @@ sub get_metadata_calc_endemism_central { pre_calc => [qw /_calc_endemism_central/], reference => $ref, uses_nbr_lists => 1, # how many sets of lists it must have + distribution => 'nonnegative', indices => { ENDC_CWE => { description => 'Corrected weighted endemism', @@ -160,6 +164,7 @@ sub get_metadata_calc_endemism_central { formula => [ '= \frac{ENDC\_WE}{ENDC\_RICHNESS}', ], + distribution => 'unit_interval', }, ENDC_WE => { description => 'Weighted endemism', @@ -547,12 +552,14 @@ sub get_metadata_calc_endemism_whole { type => 'Endemism', pre_calc => '_calc_endemism_whole', uses_nbr_lists => 1, # how many sets of lists it must have + distribution => 'nonnegative', indices => { ENDW_CWE => { description => 'Corrected weighted endemism', formula => [ '= \frac{ENDW\_WE}{ENDW\_RICHNESS}', ], + distribution => 'unit_interval', }, ENDW_WE => { description => 'Weighted endemism', @@ -597,7 +604,7 @@ sub get_metadata_calc_endemism_whole { 't', ' across the data set (the number of groups it is found in, ' . 'unless the range is specified at import).' - ] + ], }, }, ); @@ -771,12 +778,13 @@ sub get_metadata_calc_endemism_absolute_lists { type => 'Endemism', pre_calc => ['_calc_endemism_absolute'], uses_nbr_lists => 1, # how many sets of lists it must have + distribution => 'nonnegative', indices => { - END_ABS1_LIST => { + END_ABS1_LIST => { description => 'List of labels entirely endemic to neighbour set 1', type => 'list', }, - END_ABS2_LIST => { + END_ABS2_LIST => { description => 'List of labels entirely endemic to neighbour set 1', type => 'list', }, @@ -811,6 +819,7 @@ sub get_metadata_calc_endemism_absolute { type => 'Endemism', pre_calc => ['_calc_endemism_absolute'], uses_nbr_lists => 1, # how many sets of lists it must have + distribution => 'nonnegative', indices => { END_ABS1 => { description => 'Count of labels entirely endemic to neighbour set 1', @@ -822,13 +831,16 @@ sub get_metadata_calc_endemism_absolute { description => 'Count of labels entirely endemic to neighbour sets 1 and 2 combined', }, END_ABS1_P => { - description => 'Proportion of labels entirely endemic to neighbour set 1', + description => 'Proportion of labels entirely endemic to neighbour set 1', + distribution => 'unit_interval', }, END_ABS2_P => { - description => 'Proportion of labels entirely endemic to neighbour set 2', + description => 'Proportion of labels entirely endemic to neighbour set 2', + distribution => 'unit_interval', }, END_ABS_ALL_P => { - description => 'Proportion of labels entirely endemic to neighbour sets 1 and 2 combined', + description => 'Proportion of labels entirely endemic to neighbour sets 1 and 2 combined',\ + distribution => 'unit_interval', }, }, ); # add to if needed diff --git a/lib/Biodiverse/Indices/GroupProperties.pm b/lib/Biodiverse/Indices/GroupProperties.pm index bd683b470..692b1672e 100644 --- a/lib/Biodiverse/Indices/GroupProperties.pm +++ b/lib/Biodiverse/Indices/GroupProperties.pm @@ -344,7 +344,7 @@ sub get_metadata_calc_gpprop_gistar { GPPROP_GISTAR_LIST => { description => 'List of Gi* scores', type => 'list', - is_zscore => 1, + distribution => 'zscore', }, }, ); diff --git a/lib/Biodiverse/Indices/Indices.pm b/lib/Biodiverse/Indices/Indices.pm index 3c937a10e..634c258b4 100644 --- a/lib/Biodiverse/Indices/Indices.pm +++ b/lib/Biodiverse/Indices/Indices.pm @@ -62,9 +62,10 @@ sub get_metadata_calc_richness { type => 'Lists and Counts', pre_calc => 'calc_abc', uses_nbr_lists => 1, # how many sets of neighbour lists it must have + distribution => 'nonnegative', indices => { RICHNESS_ALL => { - description => 'for both sets of neighbours' + description => 'for both sets of neighbours', }, RICHNESS_SET1 => { description => 'for neighbour set 1', @@ -108,11 +109,12 @@ sub get_metadata_calc_redundancy { uses_nbr_lists => 1, # how many sets of lists it must have reference => 'Garcillan et al. (2003) J Veget. Sci. ' . 'https://doi.org/10.1111/j.1654-1103.2003.tb02174.x', + distribution => 'nonnegative', indices => { REDUNDANCY_ALL => { - description => 'for both neighbour sets', - lumper => 1, - formula => [ + description => 'for both neighbour sets', + lumper => 1, + formula => [ '= 1 - \frac{RICHNESS\_ALL}{ABC3\_SUM\_ALL}', q{}, ], @@ -277,7 +279,8 @@ sub get_metadata_calc_kulczynski2 { KULCZYNSKI2 => { cluster => 1, description => 'Kulczynski 2 index', - } + distribution => 'unit_interval', + }, }, type => 'Taxonomic Dissimilarity and Comparison', pre_calc => [qw /calc_abc is_dissimilarity_valid/], @@ -465,6 +468,7 @@ sub get_metadata_calc_nestedness_resultant { NEST_RESULTANT => { cluster => 1, description => 'Nestedness-resultant index', + distribution => 'unit_interval', } }, ); @@ -532,18 +536,22 @@ sub get_metadata_calc_bray_curtis { cluster => 1, description => 'Bray Curtis dissimilarity', lumper => 0, + distribution => 'unit_interval', }, BC_A => { description => 'The A factor used in calculations (see formula)', lumper => 0, + distribution => 'nonnegative', }, BC_B => { description => 'The B factor used in calculations (see formula)', lumper => 0, + distribution => 'nonnegative', }, BC_W => { description => 'The W factor used in calculations (see formula)', lumper => 1, + distribution => 'nonnegative', }, }, ); @@ -621,18 +629,22 @@ END_BCN_DESCR BRAY_CURTIS_NORM => { cluster => 1, description => 'Bray Curtis dissimilarity normalised by groups', + distribution => 'unit_interval', }, BCN_A => { description => 'The A factor used in calculations (see formula)', lumper => 0, + distribution => 'nonnegative', }, BCN_B => { description => 'The B factor used in calculations (see formula)', lumper => 0, + distribution => 'nonnegative', }, BCN_W => { description => 'The W factor used in calculations (see formula)', lumper => 1, + distribution => 'nonnegative', }, }, @@ -694,7 +706,7 @@ sub get_metadata_calc_beta_diversity { '= \frac{A + B + C}{max((A+B), (A+C))} - 1', $self->get_formula_explanation_ABC, ], - #formula => 'ABC / max (A+B, A+C) - 1', + distribution => 'unit_interval', }, }, type => 'Taxonomic Dissimilarity and Comparison', @@ -797,18 +809,22 @@ sub get_metadata_calc_simpson_shannon { SIMPSON_D => { description => q{Simpson's D. A score of zero is more similar.}, formula => ['D = 1 - \sum^n_{i=1} p_i^2'], + distribution => 'unit_interval', }, SHANNON_H => { description => q{Shannon's H}, formula => ['H = - \sum^n_{i=1} (p_i \cdot ln (p_i))'], + distribution => 'nonnegative', }, SHANNON_HMAX => { description => q{maximum possible value of Shannon's H}, formula => ['HMAX = ln(richness)'], + distribution => 'nonnegative', }, SHANNON_E => { description => q{Shannon's evenness (H / HMAX)}, formula => ['Evenness = \frac{H}{HMAX}'], + distribution => 'unit_interval', }, }, ); @@ -895,9 +911,11 @@ sub get_metadata_calc_tx_rao_qe { indices => { TX_RAO_QE => { description => 'Taxonomically weighted quadratic entropy', + distribution => 'unit_interval', }, TX_RAO_TN => { description => 'Count of comparisons used to calculate TX_RAO_QE', + distribution => 'nonnegative', }, TX_RAO_TLABELS => { description => 'List of labels and values used in the TX_RAO_QE calculations', @@ -946,8 +964,12 @@ sub get_metadata_calc_mx_rao_qe { indices => { MX_RAO_QE => { description => 'Matrix weighted quadratic entropy', + distribution => 'unit_interval', + }, + MX_RAO_TN => { + description => 'Count of comparisons used to calculate MX_RAO_QE', + distribution => 'nonnegative', }, - MX_RAO_TN => {description => 'Count of comparisons used to calculate MX_RAO_QE'}, MX_RAO_TLABELS => { description => 'List of labels and values used in the MX_RAO_QE calculations', type => 'list', @@ -1071,6 +1093,7 @@ sub get_metadata_calc_local_range_stats { type => 'Lists and Counts', pre_calc => 'calc_abc2', uses_nbr_lists => 1, # how many sets of lists it must have + distribution => 'nonnegative', indices => { ABC2_MEAN_ALL => { description => 'Mean label range in both element sets', @@ -1203,6 +1226,7 @@ sub get_metadata_calc_local_sample_count_stats { my %metadata = ( name => 'Sample count summary stats', description => "Summary stats of the sample counts across the neighbour sets.\n", + distribution => 'nonnegative', indices => { ABC3_MEAN_ALL => { description => 'Mean of label sample counts across both element sets.', @@ -1353,6 +1377,7 @@ sub get_metadata_calc_abc_counts { description => "Counts of labels in neighbour sets 1 and 2.\n" . 'These form the basis for the Taxonomic Dissimilarity and Comparison indices.', type => 'Lists and Counts', + distribution => 'nonnegative', indices => { ABC_A => { description => 'Count of labels common to both neighbour sets', @@ -1428,6 +1453,7 @@ sub get_metadata_calc_d { indices => { ABC_D => { description => 'Count of labels not in either neighbour set (D score)', + bounds => [0, 'Inf'], } }, ); @@ -1445,9 +1471,10 @@ sub get_metadata_calc_elements_used { type => 'Lists and Counts', pre_calc => 'calc_abc', uses_nbr_lists => 1, # how many sets of lists it must have + distribution => 'nonnegative', indices => { EL_COUNT_SET1 => { - description => 'Count of elements in neighbour set 1', + description => 'Count of elements in neighbour set 1', lumper => 0, }, EL_COUNT_SET2 => { diff --git a/lib/Biodiverse/Indices/LabelProperties.pm b/lib/Biodiverse/Indices/LabelProperties.pm index 8dcd7d5a4..61129c7fb 100644 --- a/lib/Biodiverse/Indices/LabelProperties.pm +++ b/lib/Biodiverse/Indices/LabelProperties.pm @@ -394,7 +394,7 @@ sub get_metadata_calc_lbprop_gistar { LBPROP_GISTAR_LIST => { description => 'List of Gi* scores', type => 'list', - is_zscore => 1, + distribution => 'zscore', }, }, ); diff --git a/lib/Biodiverse/Indices/LabelPropertiesRangeWtd.pm b/lib/Biodiverse/Indices/LabelPropertiesRangeWtd.pm index f8117013b..20f1cfcb2 100644 --- a/lib/Biodiverse/Indices/LabelPropertiesRangeWtd.pm +++ b/lib/Biodiverse/Indices/LabelPropertiesRangeWtd.pm @@ -214,7 +214,7 @@ sub get_metadata_calc_lbprop_gistar_abc2 { LBPROP_GISTAR_LIST_ABC2 => { description => 'List of Gi* scores', type => 'list', - is_zscore => 1, + distribution => 'zscore', }, }, ); diff --git a/lib/Biodiverse/Indices/Matrix_Indices.pm b/lib/Biodiverse/Indices/Matrix_Indices.pm index 586ca4637..83e73a318 100644 --- a/lib/Biodiverse/Indices/Matrix_Indices.pm +++ b/lib/Biodiverse/Indices/Matrix_Indices.pm @@ -37,9 +37,15 @@ sub get_metadata_calc_matrix_stats { indices => { MX_MEAN => {description => 'Mean'}, MX_SD => {description => 'Standard deviation'}, - MX_N => {description => 'Number of samples (matrix elements, not labels)'}, + MX_N => { + description => 'Number of samples (matrix elements, not labels)', + distribution => 'nonnegative', + }, MX_MEDIAN => {description => 'Median'}, - MX_RANGE => {description => 'Range (max-min)'}, + MX_RANGE => { + description => 'Range (max-min)', + distribution => 'nonnegative', + }, MX_MINVALUE => {description => 'Minimum value'}, MX_MAXVALUE => {description => 'Maximum value'}, MX_SKEW => {description => 'Skewness'}, diff --git a/lib/Biodiverse/Indices/Numeric_Labels.pm b/lib/Biodiverse/Indices/Numeric_Labels.pm index 1c018fbd8..d87da0299 100644 --- a/lib/Biodiverse/Indices/Numeric_Labels.pm +++ b/lib/Biodiverse/Indices/Numeric_Labels.pm @@ -80,8 +80,14 @@ sub get_metadata_calc_numeric_label_stats { indices => { NUM_SD => {description => 'Standard deviation',}, NUM_MEAN => {description => 'Mean',}, - NUM_N => {description => 'Number of samples',}, - NUM_RANGE => {description => 'Range (max - min)',}, + NUM_N => { + description => 'Number of samples', + distribution => 'nonnegative', + }, + NUM_RANGE => { + description => 'Range (max - min)', + distribution => 'nonnegative', + }, NUM_SKEW => {description => 'Skewness',}, NUM_KURT => {description => 'Kurtosis',}, NUM_CV => {description => 'Coefficient of variation (NUM_SD / NUM_MEAN)',}, @@ -299,6 +305,7 @@ sub get_metadata_calc_numeric_label_dissimilarity { pre_calc => 'calc_abc3', uses_nbr_lists => 2, # how many sets of lists it must have pre_conditions => ['labels_are_numeric'], + distribution => 'nonnegative', indices => { NUMD_ABSMEAN => { description => 'Mean absolute dissimilarity of labels in set 1 to those in set 2.', @@ -529,7 +536,7 @@ sub get_metadata_calc_num_labels_gistar { NUM_GISTAR => { description => 'List of Gi* scores', lumper => 1, - is_zscore => 1, + distribution => 'zscore', }, }, ); diff --git a/lib/Biodiverse/Indices/PhyloCom.pm b/lib/Biodiverse/Indices/PhyloCom.pm index 462e9de45..865321f82 100644 --- a/lib/Biodiverse/Indices/PhyloCom.pm +++ b/lib/Biodiverse/Indices/PhyloCom.pm @@ -78,6 +78,7 @@ sub get_mpd_mntd_metadata { }, PNTD_VARIANCE => { description => 'Variance of nearest taxon distances', + distribution => 'nonnegative', }, PNTD_MAX => { description => 'Maximum of nearest taxon distances', @@ -87,9 +88,11 @@ sub get_mpd_mntd_metadata { }, PNTD_RMSD => { description => 'Root mean squared nearest taxon distances', + distribution => 'nonnegative', }, PNTD_N => { description => 'Count of nearest taxon distances', + distribution => 'nonnegative', }, PMPD_MEAN => { description => 'Mean of pairwise phylogenetic distances', @@ -100,18 +103,22 @@ sub get_mpd_mntd_metadata { . "similar to Clarke and Warwick (2001; http://dx.doi.org/10.3354/meps216265)" . " but uses tip-to-tip distances instead of tip to most recent common ancestor.", #formula => $mpd_variance_formula, + distribution => 'nonnegative', }, PMPD_MAX => { description => 'Maximum of pairwise phylogenetic distances', + distribution => 'nonnegative', }, PMPD_MIN => { description => 'Minimum of pairwise phylogenetic distances', }, PMPD_RMSD => { description => 'Root mean squared pairwise phylogenetic distances', + distribution => 'nonnegative', }, PMPD_N => { description => 'Count of pairwise phylogenetic distances', + distribution => 'nonnegative', }, }; @@ -822,12 +829,12 @@ sub get_metadata_calc_nri_nti1 { PHYLO_NRI1 => { description => 'Net Relatedness Index, unweighted', formula => $nri_formula, - is_zscore => 1, + distribution => 'zscore', }, PHYLO_NTI1 => { description => 'Nearest Taxon Index, unweighted', formula => $nti_formula, - is_zscore => 1, + distribution => 'zscore', }, }, uses_nbr_lists => 1, @@ -871,12 +878,12 @@ sub get_metadata_calc_nri_nti2 { PHYLO_NRI2 => { description => 'Net Relatedness Index, local range weighted', formula => [], - is_zscore => 1, + distribution => 'zscore', }, PHYLO_NTI2 => { description => 'Nearest Taxon Index, local range weighted', formula => [], - is_zscore => 1, + distribution => 'zscore', }, }, uses_nbr_lists => 1, @@ -920,12 +927,12 @@ sub get_metadata_calc_nri_nti3 { PHYLO_NRI3 => { description => 'Net Relatedness Index, abundance weighted', formula => [], - is_zscore => 1, + distribution => 'zscore', }, PHYLO_NTI3 => { description => 'Nearest Taxon Index, abundance weighted', formula => [], - is_zscore => 1, + distribution => 'zscore', }, }, uses_nbr_lists => 1, @@ -984,6 +991,7 @@ sub get_metadata_calc_nri_nti_expected_values { PHYLO_NRI_SAMPLE_SD => { description => 'Expected standard deviation of pair-wise distances', formula => [], + distribution => 'nonnegative', }, PHYLO_NTI_SAMPLE_MEAN => { description => 'Expected mean of nearest taxon distances', @@ -992,10 +1000,12 @@ sub get_metadata_calc_nri_nti_expected_values { PHYLO_NTI_SAMPLE_SD => { description => 'Expected standard deviation of nearest taxon distances', formula => [], + distribution => 'nonnegative', }, PHYLO_NRI_NTI_SAMPLE_N => { description => 'Number of random resamples used', formula => [], + distribution => 'nonnegative', }, }; @@ -1403,7 +1413,7 @@ sub get_metadata_calc_net_vpd { PHYLO_NET_VPD => { description => 'Net variance of pair-wise phylogenetic distances, unweighted', #formula => $nri_formula, - is_zscore => 1, + distribution => 'zscore', }, }, uses_nbr_lists => 1, diff --git a/lib/Biodiverse/Indices/Phylogenetic.pm b/lib/Biodiverse/Indices/Phylogenetic.pm index 52d7eb505..cb806619a 100644 --- a/lib/Biodiverse/Indices/Phylogenetic.pm +++ b/lib/Biodiverse/Indices/Phylogenetic.pm @@ -70,9 +70,9 @@ sub get_metadata_calc_pd { ], }, PD_P => { - cluster => undef, - description => 'Phylogenetic diversity as a proportion of total tree length', - formula => [ + cluster => undef, + description => 'Phylogenetic diversity as a proportion of total tree length', + formula => [ '= \frac { PD }{ \sum_{c \in C} L_c }', ' where terms are the same as for PD, but ', 'c', @@ -82,6 +82,7 @@ sub get_metadata_calc_pd { 'L_c', ' are calculated for all nodes in the tree.', ], + distribution => 'unit_interval', }, PD_per_taxon => { cluster => undef, @@ -145,8 +146,8 @@ sub get_metadata_calc_pd_local { ], }, PD_LOCAL_P => { - description => 'Phylogenetic diversity as a proportion of total tree length', - formula => [ + description => 'Phylogenetic diversity as a proportion of total tree length', + formula => [ '= \frac { PD }{ \sum_{c \in C} L_c }', ' where terms are the same as for PD, but ', 'c', @@ -156,6 +157,7 @@ sub get_metadata_calc_pd_local { 'L_c', ' are calculated for all nodes in the tree.', ], + distribution => 'unit_interval', }, }, ); @@ -397,7 +399,8 @@ sub get_metadata_calc_pd_terminal_node_count { uses_nbr_lists => 1, # how many lists it must have indices => { PD_INCLUDED_TERMINAL_NODE_COUNT => { - description => 'Count of tree terminal nodes included in the PD calculations', + description => 'Count of tree terminal nodes included in the PD calculations', + distribution => 'nonnegative', }, }, ); @@ -710,7 +713,8 @@ sub get_metadata_calc_pe { }, PE_WE_P => { description => 'Phylogenetic weighted endemism as a proportion of the total tree length', - formula => ['PE\_WE / L', ' where L is the sum of all branch lengths in the trimmed tree'], + formula => [ 'PE\_WE / L', ' where L is the sum of all branch lengths in the trimmed tree' ], + distribution => 'unit_interval', }, }, ); @@ -737,6 +741,7 @@ sub get_metadata_calc_pe_lists { type => 'Phylogenetic Endemism Indices', pre_calc => ['_calc_pe'], uses_nbr_lists => 1, + distribution => 'nonnegative', indices => { PE_WTLIST => { description => 'Node weights used in PE calculations', @@ -801,7 +806,8 @@ END_PEC_DESC description => 'Phylogenetic endemism, central variant' }, PEC_WE_P => { - description => 'Phylogenetic weighted endemism as a proportion of the total tree length, central variant' + description => 'Phylogenetic weighted endemism as a proportion of the total tree length, central variant', + distribution => 'unit_interval', }, }, ); @@ -850,6 +856,7 @@ END_PEC_DESC type => 'Phylogenetic Endemism Indices', pre_calc => [qw /_calc_pe _calc_phylo_abc_lists/], uses_nbr_lists => 1, # how many lists it must have + distribution => 'nonnegative', indices => { PEC_WTLIST => { description => 'Phylogenetic endemism weights, central variant', @@ -917,6 +924,7 @@ sub get_metadata_calc_pe_central_cwe { indices => { PEC_CWE => { description => 'Corrected weighted phylogenetic endemism, central variant', + distribution => 'unit_interval', }, PEC_CWE_PD => { description => 'PD used in the PEC_CWE index.', @@ -1070,6 +1078,7 @@ sub get_metadata_calc_pe_clade_contributions { PE_CLADE_CONTR_P => { description => 'List of node (clade) contributions to the PE calculation, proportional to the entire tree', type => 'list', + distribution => 'unit_interval', }, }, ); @@ -1158,6 +1167,7 @@ sub get_metadata_calc_pe_clade_loss { PE_CLADE_LOSS_CONTR_P => { description => 'As per PE_CLADE_LOSS but proportional to the entire tree', type => 'list', + distribution => 'unit_interval', }, }, ); @@ -1251,8 +1261,9 @@ sub get_metadata_calc_pd_clade_loss_ancestral { }, PD_CLADE_LOSS_ANC_P => { description => 'List of the proportion of the clade\'s PD loss ' - . 'that is due to the ancestral branches.', + . 'that is due to the ancestral branches.', type => 'list', + distribution => 'unit_interval', }, }, ); @@ -1291,8 +1302,9 @@ sub get_metadata_calc_pe_clade_loss_ancestral { }, PE_CLADE_LOSS_ANC_P => { description => 'List of the proportion of the clade\'s PE loss ' - . 'that is due to the ancestral branches.', + . 'that is due to the ancestral branches.', type => 'list', + distribution => 'unit_interval', }, }, ); @@ -1377,8 +1389,9 @@ EOD }, PE_WE_SINGLE_P => { description => "Phylogenetic endemism unweighted by the number of neighbours as a proportion of the total tree length.\n" - . "Counts each label only once, regardless of how many groups in the neighbourhood it is found.\n" - . "Useful if your data have sampling biases." + . "Counts each label only once, regardless of how many groups in the neighbourhood it is found.\n" + . "Useful if your data have sampling biases.", + distribution => 'unit_interval', }, }, ); @@ -1436,6 +1449,7 @@ sub get_metadata_calc_pd_endemism { }, PD_ENDEMISM_P => { description => 'Phylogenetic Diversity Endemism, as a proportion of the whole tree', + distribution => 'unit_interval', }, #PD_ENDEMISM_R => { # should put in its own calc as it needs an extra dependency # description => 'Phylogenetic Diversity Endemism, as a proportion of the local PD', @@ -1512,6 +1526,7 @@ sub get_metadata_calc_count_labels_on_tree { indices => { PHYLO_LABELS_ON_TREE_COUNT => { description => 'The number of labels that are found on the tree, across both neighbour sets', + distribution => 'nonnegative', }, }, type => 'Phylogenetic Indices', # keeps it clear of the other indices in the GUI @@ -1581,11 +1596,11 @@ sub get_metadata_calc_labels_not_on_tree { }, # should poss also do nbr sets 1 and 2 PHYLO_LABELS_NOT_ON_TREE_N => { description => 'Number of labels not on the tree', - + distribution => 'nonnegative', }, PHYLO_LABELS_NOT_ON_TREE_P => { description => 'Proportion of labels not on the tree', - + distribution => 'unit_interval', }, }, type => 'Phylogenetic Indices', # keeps it clear of the other indices in the GUI @@ -2341,7 +2356,7 @@ sub get_metadata_calc_phylo_sorenson { indices => { PHYLO_SORENSON => { cluster => 'NO_CACHE_ABC', - bounds => [0, 1], + bounds => [0,1], formula => [ '1 - (2A / (2A + B + C))', ' where A is the length of shared branches, ' @@ -2387,7 +2402,7 @@ sub get_metadata_calc_phylo_jaccard { indices => { PHYLO_JACCARD => { cluster => 'NO_CACHE_ABC', - bounds => [0, 1], + bounds => [0,1], formula => [ '= 1 - (A / (A + B + C))', ' where A is the length of shared branches, ' @@ -2438,7 +2453,7 @@ sub get_metadata_calc_phylo_s2 { . 'only in neighbour sets 1 and 2', ], description => 'Phylo S2 score', - bounds => [0, 1], + distribution => 'unit_interval', # min (B,C) in denominator means cluster order # influences tie breaker results as different # assemblages are merged @@ -2480,19 +2495,19 @@ sub get_metadata_calc_phylo_abc { uses_nbr_lists => 2, # how many sets of lists it must have indices => { PHYLO_A => { - description => 'Length of branches shared by labels in nbr sets 1 and 2', + description => 'Sum of branch lengths shared by labels in nbr sets 1 and 2', lumper => 1, }, PHYLO_B => { - description => 'Length of branches unique to labels in nbr set 1', + description => 'Sum of branch lengths unique to labels in nbr set 1', lumper => 0, }, PHYLO_C => { - description => 'Length of branches unique to labels in nbr set 2', + description => 'Sum of branch lengths unique to labels in nbr set 2', lumper => 0, }, PHYLO_ABC => { - description => 'Length of all branches associated with labels in nbr sets 1 and 2', + description => 'Sum of branch lengths associated with labels in nbr sets 1 and 2', lumper => 1, }, }, @@ -2687,9 +2702,10 @@ sub get_metadata_calc_phylo_corrected_weighted_endemism{ reference => '', indices => { PE_CWE => { - description => $descr, - reference => '', - formula => ['PE\_WE / PD'], + description => $descr, + reference => '', + formula => [ 'PE\_WE / PD' ], + distribution => 'unit_interval', }, }, ); @@ -2727,9 +2743,10 @@ sub get_metadata_calc_phylo_corrected_weighted_rarity { reference => '', indices => { PHYLO_RARITY_CWR => { - description => $descr, - reference => '', - formula => ['AED_T / PD'], + description => $descr, + reference => '', + formula => [ 'AED_T / PD' ], + distribution => 'unit_interval', }, }, ); @@ -3076,6 +3093,7 @@ sub get_metadata_calc_phylo_abundance { pre_calc => [qw /_calc_pd calc_abc3 calc_labels_on_tree/], pre_calc_global => [qw /get_trimmed_tree get_global_node_abundance_hash/], uses_nbr_lists => 1, # how many lists it must have + distribution => 'nonnegative', indices => { PHYLO_ABUNDANCE => { cluster => undef, diff --git a/lib/Biodiverse/Indices/PhylogeneticRelative.pm b/lib/Biodiverse/Indices/PhylogeneticRelative.pm index 767356f84..5dd22c99c 100644 --- a/lib/Biodiverse/Indices/PhylogeneticRelative.pm +++ b/lib/Biodiverse/Indices/PhylogeneticRelative.pm @@ -34,7 +34,7 @@ sub get_metadata_calc_phylo_rpd1 { indices => { PHYLO_RPD1 => { description => 'RPD1', - is_ratio => 1, + distribution => 'nonnegative_ratio', }, PHYLO_RPD_NULL1 => { description => 'Null model score used as the denominator in the RPD1 calculations', @@ -42,7 +42,7 @@ sub get_metadata_calc_phylo_rpd1 { PHYLO_RPD_DIFF1 => { description => 'How much more or less PD is there than expected, in original tree units.', formula => ['= tree\_length \times (PD\_P - PHYLO\_RPD\_NULL1)'], - is_divergent => 1, + distribution => 'divergent', } }, ); @@ -101,7 +101,7 @@ sub get_metadata_calc_phylo_rpe1 { indices => { PHYLO_RPE1 => { description => 'Relative Phylogenetic Endemism score', - is_ratio => 1, + distribution => 'nonnegative_ratio', }, PHYLO_RPE_NULL1 => { description => 'Null score used as the denominator in the RPE calculations', @@ -109,7 +109,7 @@ sub get_metadata_calc_phylo_rpe1 { PHYLO_RPE_DIFF1 => { description => 'How much more or less PE is there than expected, in original tree units.', formula => ['= tree\_length \times (PE\_WE\_P - PHYLO\_RPE\_NULL1)'], - is_divergent => 1, + distribution => 'divergent', } }, ); @@ -170,7 +170,7 @@ sub get_metadata_calc_phylo_rpd2 { indices => { PHYLO_RPD2 => { description => 'RPD2', - is_ratio => 1, + distribution => 'nonnegative_ratio', }, PHYLO_RPD_NULL2 => { description => 'Null model score used as the denominator in the RPD2 calculations', @@ -178,7 +178,7 @@ sub get_metadata_calc_phylo_rpd2 { PHYLO_RPD_DIFF2 => { description => 'How much more or less PD is there than expected, in original tree units.', formula => ['= tree\_length \times (PD\_P - PHYLO\_RPD\_NULL2)'], - is_divergent => 1, + distribution => 'divergent', } }, ); @@ -247,7 +247,7 @@ sub get_metadata_calc_phylo_rpe_central { indices => { PHYLO_RPEC => { description => 'Relative Phylogenetic Endemism score, central', - is_ratio => 1, + distribution => 'nonnegative_ratio', }, PHYLO_RPE_NULLC => { description => 'Null score used as the denominator in the PHYLO_RPEC calculations', @@ -255,7 +255,7 @@ sub get_metadata_calc_phylo_rpe_central { PHYLO_RPE_DIFFC => { description => 'How much more or less PE is there than expected, in original tree units.', formula => ['= tree\_length \times (PE\_WEC\_P - PHYLO\_RPE\_NULLC)'], - is_divergent => 1, + distribution => 'divergent', } }, ); @@ -307,7 +307,7 @@ sub get_metadata_calc_phylo_rpe2 { indices => { PHYLO_RPE2 => { description => 'Relative Phylogenetic Endemism score, type 2', - is_ratio => 1, + distribution => 'nonnegative_ratio', }, PHYLO_RPE_NULL2 => { description => 'Null score used as the denominator in the RPE2 calculations', @@ -315,7 +315,7 @@ sub get_metadata_calc_phylo_rpe2 { PHYLO_RPE_DIFF2 => { description => 'How much more or less PE is there than expected, in original tree units.', formula => ['= tree\_length \times (PE\_WE\_P - PHYLO\_RPE\_NULL2)'], - is_divergent => 1, + distribution => 'divergent', } }, ); diff --git a/lib/Biodiverse/Indices/Rarity.pm b/lib/Biodiverse/Indices/Rarity.pm index b70189cec..da84332d6 100644 --- a/lib/Biodiverse/Indices/Rarity.pm +++ b/lib/Biodiverse/Indices/Rarity.pm @@ -59,11 +59,12 @@ sub get_metadata_calc_rarity_central { uses_nbr_lists => 1, # how many sets of lists it must have indices => { RAREC_CWE => { - description => 'Corrected weighted rarity', - lumper => 0, - formula => [ + description => 'Corrected weighted rarity', + lumper => 0, + formula => [ '= \frac{RAREC\_WE}{RAREC\_RICHNESS}', ], + distribution => 'unit_interval', }, RAREC_WE => { description => 'Weighted rarity', diff --git a/lib/Biodiverse/Metadata/Indices.pm b/lib/Biodiverse/Metadata/Indices.pm index 102f85a74..ab94356ad 100644 --- a/lib/Biodiverse/Metadata/Indices.pm +++ b/lib/Biodiverse/Metadata/Indices.pm @@ -21,6 +21,22 @@ Readonly my %methods_and_defaults => ( formula => undef, ); +sub new { + my ($class, $data) = @_; + $data //= {}; + + my $self = __PACKAGE__->SUPER::new ($data); + bless $self, $class; + + my $indices = $self->{indices} // {}; + foreach my $index (keys %{$indices}) { + # triggers it being set + $self->get_index_bounds ($index); + } + + return $self; +} + sub _get_method_default_hash { return wantarray ? %methods_and_defaults : {%methods_and_defaults}; @@ -118,6 +134,23 @@ sub get_index_formula { return $formula; } +sub get_index_bounds { + my ($self, $index) = @_; + + no autovivification; + my $idx_hash = $self->{indices}{$index}; + croak "No index $index" if !$idx_hash; + + my $bounds + = $self->{indices}{$index}{bounds} + //= $self->get_index_is_nonnegative($index) ? [0,'Inf'] + : $self->get_index_is_unit_interval($index) ? [0,1] + : $self->get_index_is_categorical($index) ? [] + : ['-Inf','Inf']; + + return $bounds; +} + sub get_index_reference { my ($self, $index) = @_; @@ -160,36 +193,86 @@ sub get_index_is_list { my ($self, $index) = @_; no autovivification; - + my $indices = $self->get_indices; + return ($indices->{$index}{type} // '') eq 'list'; } -sub get_index_is_zscore { - my ($self, $index) = @_; - no autovivification; +my %valid_distributions = ( + '' => 1, + sequential => 1, + unit_interval => 1, + zscore => 1, + divergent => 1, + categorical => 1, + nonnegative => 1, + nonnegative_ratio => 1, +); - my $indices = $self->get_indices; - return $indices->{$index}{is_zscore}; +sub index_distribution_is_valid { + my ($self, $index) = @_; + my $distr = $self->get_index_distribution($index); + return $valid_distributions{$distr}; } sub get_index_is_ratio { my ($self, $index) = @_; + return return $self->get_index_distribution($index) =~ /ratio$/; +} - no autovivification; +sub get_index_is_nonnegative { + my ($self, $index) = @_; - my $indices = $self->get_indices; - return $indices->{$index}{is_ratio}; + return 0 if $self->get_index_is_zscore($index); + return 1 if $self->get_index_is_unit_interval ($index); + + return $self->get_index_distribution($index) =~ '^nonnegative'; } -sub get_index_is_divergent { +# default is sequential +sub get_index_is_sequential { my ($self, $index) = @_; + return $self->get_index_distribution($index) eq 'sequential'; +} - no autovivification; +sub get_index_distribution { + my ($self, $index) = @_; + no autovivification; my $indices = $self->get_indices; - return $indices->{$index}{is_divergent}; + return $indices->{$index}{distribution} // $self->{distribution} // 'sequential'; +} + +__PACKAGE__->_make_distribution_methods (keys %valid_distributions); + +sub _make_distribution_methods { + my ($pkg, @methods) = @_; + # print "Calling _make_access_methods for $pkg"; + no strict 'refs'; + # filter blanks + foreach my $key (grep {$_} @methods) { + my $method = "get_index_is_$key"; + next if $pkg->can($method); # do not override + # say STDERR "Building $method in package $pkg"; + *{"${pkg}::${method}"} = + do { + sub { + my ($self, $index) = @_; + return $self->get_index_distribution($index) eq $key; + }; + }; + } + + return; +} + + +sub TO_JSON { + my ($self) = @_; + my $ref = {%$self}; # a crude unbless + $ref; } 1; diff --git a/t/23-Indices.t b/t/23-Indices.t index e39df5207..8897c7dfa 100644 --- a/t/23-Indices.t +++ b/t/23-Indices.t @@ -221,6 +221,33 @@ sub test_general { } +sub test_index_distribution { + my $indices_object = eval {Biodiverse::Indices->new(BASEDATA_REF => $bd)}; + my $indices = $indices_object->get_indices; + + INDEX: + foreach my $index (sort keys %$indices) { + ok $indices_object->index_distribution_is_valid (index => $index), + "Valid distribution keyword for $index"; + } +} + +sub test_index_bounds { + my $indices_object = eval {Biodiverse::Indices->new(BASEDATA_REF => $bd)}; + + use Regexp::Common; + my $RE_bound = qr/^(?:$RE{num}{real}|[+-]?Inf)$/; + + my $indices = $indices_object->get_indices; + + foreach my $index (sort keys %$indices) { + my $bounds = $indices_object->get_index_bounds (index => $index); + like $bounds, + [$RE_bound, $RE_bound], + "Bounds for scalar index $index match expected pattern"; + } +} + sub test_metadata { my $indices = eval {Biodiverse::Indices->new(BASEDATA_REF => $bd)}; #my %calculations = eval {$indices->get_calculations_as_flat_hash}; diff --git a/t/26-Cluster.t b/t/26-Cluster.t index ad59e41eb..0dc996172 100644 --- a/t/26-Cluster.t +++ b/t/26-Cluster.t @@ -575,7 +575,6 @@ sub check_matrices_differ { } -1; __DATA__ diff --git a/t/26-Cluster2.t b/t/26-Cluster2.t index bcfb47c7a..22b52830e 100644 --- a/t/26-Cluster2.t +++ b/t/26-Cluster2.t @@ -215,7 +215,6 @@ sub test_rw_turnover_mx { is ($stats, \%expected, 'got expected stats for rw_turnover mx'); } -1; __DATA__ diff --git a/t/26-RegionGrower.t b/t/26-RegionGrower.t index 2c3b54c7e..ea9c1fae2 100644 --- a/t/26-RegionGrower.t +++ b/t/26-RegionGrower.t @@ -293,7 +293,6 @@ sub get_site_data_newick_tree { -1; __DATA__ diff --git a/t/28-Randomisation.t b/t/28-Randomisation.t index 3d4ae38a1..e45d1e9fd 100644 --- a/t/28-Randomisation.t +++ b/t/28-Randomisation.t @@ -1982,7 +1982,6 @@ sub print_randomisation_result_set_to_fh { } -1; __DATA__ diff --git a/t/28-Randomisation2.t b/t/28-Randomisation2.t index 3346d1870..0277528e0 100644 --- a/t/28-Randomisation2.t +++ b/t/28-Randomisation2.t @@ -444,6 +444,3 @@ sub test_checkpoint_cwd_check { note "Current wd is now " . getcwd(); } } - - -1; diff --git a/t/31-Remap.t b/t/31-Remap.t index ca86225a3..cca5333c2 100644 --- a/t/31-Remap.t +++ b/t/31-Remap.t @@ -86,5 +86,3 @@ sub test_remapped_element_names { is ($remapped, $expected, $msg); } } - -done_testing(); diff --git a/t/31-RemapGuesser.t b/t/31-RemapGuesser.t index e92f0b295..5be95447e 100755 --- a/t/31-RemapGuesser.t +++ b/t/31-RemapGuesser.t @@ -466,5 +466,3 @@ sub test_max_distance_ambiguous { 'got expected ambiguous matches for min distance 2' ); } - -done_testing();