diff --git a/src/main/java/org/ohdsi/webapi/cohortresults/DataCompletenessAttr.java b/src/main/java/org/ohdsi/webapi/cohortresults/DataCompletenessAttr.java new file mode 100644 index 0000000000..4e85686ee7 --- /dev/null +++ b/src/main/java/org/ohdsi/webapi/cohortresults/DataCompletenessAttr.java @@ -0,0 +1,70 @@ +package org.ohdsi.webapi.cohortresults; + + +/** + * + */ +public class DataCompletenessAttr { + private String covariance; + private float genderP; + private float raceP; + private float ethP; + + /** + * @return the covariance + */ + public String getCovariance() { + return covariance; + } + + /** + * @param covariance the covariance to set + */ + public void setCovariance(String covariance) { + this.covariance = covariance; + } + + /** + * @return the genderP + */ + public float getGenderP() { + return genderP; + } + + /** + * @param genderP the genderP to set + */ + public void setGenderP(float genderP) { + this.genderP = genderP; + } + + /** + * @return the raceP + */ + public float getRaceP() { + return raceP; + } + + /** + * @param raceP the raceP to set + */ + public void setRaceP(float raceP) { + this.raceP = raceP; + } + + /** + * @return the ethP + */ + public float getEthP() { + return ethP; + } + + /** + * @param ethP the ethP to set + */ + public void setEthP(float ethP) { + this.ethP = ethP; + } + + +} diff --git a/src/main/java/org/ohdsi/webapi/cohortresults/EntropyAttr.java b/src/main/java/org/ohdsi/webapi/cohortresults/EntropyAttr.java new file mode 100644 index 0000000000..286c6571e7 --- /dev/null +++ b/src/main/java/org/ohdsi/webapi/cohortresults/EntropyAttr.java @@ -0,0 +1,56 @@ +package org.ohdsi.webapi.cohortresults; + +/** + * + */ +public class EntropyAttr { + + private String date; + + private float entropy; + + private String insitution; + + /** + * @return the date + */ + public String getDate() { + return date; + } + + /** + * @param date the date to set + */ + public void setDate(String date) { + this.date = date; + } + + /** + * @return the entropy + */ + public float getEntropy() { + return entropy; + } + + /** + * @param entropy the entropy to set + */ + public void setEntropy(float entropy) { + this.entropy = entropy; + } + + /** + * @return the insitution + */ + public String getInsitution() { + return insitution; + } + + /** + * @param insitution the insitution to set + */ + public void setInsitution(String insitution) { + this.insitution = insitution; + } + +} diff --git a/src/main/java/org/ohdsi/webapi/cohortresults/mapper/AnalysisResultsMapper.java b/src/main/java/org/ohdsi/webapi/cohortresults/mapper/AnalysisResultsMapper.java new file mode 100644 index 0000000000..42e37cd618 --- /dev/null +++ b/src/main/java/org/ohdsi/webapi/cohortresults/mapper/AnalysisResultsMapper.java @@ -0,0 +1,33 @@ +package org.ohdsi.webapi.cohortresults.mapper; + +import java.sql.ResultSet; +import java.sql.SQLException; + +import org.ohdsi.webapi.model.results.AnalysisResults; +import org.springframework.jdbc.core.RowMapper; + + +/** + * + */ +public class AnalysisResultsMapper implements RowMapper { + + /** + * @see org.springframework.jdbc.core.RowMapper#mapRow(java.sql.ResultSet, int) + */ + @Override + public AnalysisResults mapRow(ResultSet rs, int rowNum) throws SQLException { + AnalysisResults ar = new AnalysisResults(); + ar.setAnalysisId(rs.getInt("ANALYSIS_ID")); + ar.setCohortDefinitionId(rs.getInt("COHORT_DEFINITION_ID")); + ar.setStratum1(rs.getString("STRATUM_1")); + ar.setStratum2(rs.getString("STRATUM_2")); + ar.setStratum3(rs.getString("STRATUM_3")); + ar.setStratum4(rs.getString("STRATUM_4")); + ar.setStratum5(rs.getString("STRATUM_5")); + ar.setCountValue(rs.getInt("count_value")); + + return ar; + } + +} diff --git a/src/main/java/org/ohdsi/webapi/service/CohortResultsService.java b/src/main/java/org/ohdsi/webapi/service/CohortResultsService.java index 4fd1c01d30..823e872ee8 100644 --- a/src/main/java/org/ohdsi/webapi/service/CohortResultsService.java +++ b/src/main/java/org/ohdsi/webapi/service/CohortResultsService.java @@ -4,6 +4,7 @@ import java.sql.SQLException; import java.util.ArrayList; import java.util.Collection; +import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -44,12 +45,16 @@ import org.ohdsi.webapi.cohortresults.CohortSpecificSummary; import org.ohdsi.webapi.cohortresults.CohortSpecificTreemap; import org.ohdsi.webapi.cohortresults.CohortVisitsDrilldown; +import org.ohdsi.webapi.cohortresults.DataCompletenessAttr; +import org.ohdsi.webapi.cohortresults.EntropyAttr; import org.ohdsi.webapi.cohortresults.HierarchicalConceptRecord; import org.ohdsi.webapi.cohortresults.ScatterplotRecord; import org.ohdsi.webapi.cohortresults.VisualizationData; import org.ohdsi.webapi.cohortresults.VisualizationDataRepository; +import org.ohdsi.webapi.cohortresults.mapper.AnalysisResultsMapper; import org.ohdsi.webapi.helper.ResourceHelper; import org.ohdsi.webapi.model.results.Analysis; +import org.ohdsi.webapi.model.results.AnalysisResults; import org.ohdsi.webapi.source.Source; import org.ohdsi.webapi.source.SourceDaimon; import org.ohdsi.webapi.util.SessionUtils; @@ -1595,7 +1600,130 @@ public List getHeraclesHeel(@PathParam("id") final int id, return attrs; } + + public List getCohortAnalysesForDataCompleteness(@PathParam("id") final int id, + @PathParam("sourceKey") String sourceKey) { + + String sql = null; + sql = ResourceHelper.GetResourceAsString("/resources/cohortresults/sql/datacompleteness/getCohortDataCompleteness.sql"); + + Source source = getSourceRepository().findBySourceKey(sourceKey); + String resultsTableQualifier = source.getTableQualifier(SourceDaimon.DaimonType.Results); + + sql = SqlRender.renderSql(sql, new String[]{"tableQualifier", "cohortDefinitionId"}, + new String[]{resultsTableQualifier, String.valueOf(id)}); + sql = SqlTranslate.translateSql(sql, getSourceDialect(), source.getSourceDialect()); + + + AnalysisResultsMapper arm = new AnalysisResultsMapper(); + + return getSourceJdbcTemplate(source).query(sql, arm); + } + @GET + @Path("{sourceKey}/{id}/datacompleteness") + @Produces(MediaType.APPLICATION_JSON) + public List getDataCompleteness(@PathParam("id") final int id, + @PathParam("sourceKey") String sourceKey) { + List arl = this.getCohortAnalysesForDataCompleteness(id, sourceKey); + + List dcal = new ArrayList(); + + Map resultMap = new HashMap(); + + for(AnalysisResults ar : arl){ + resultMap.put(ar.getAnalysisId(), ar); + } + + DataCompletenessAttr aca = new DataCompletenessAttr(); + aca.setCovariance("0~10"); + aca.setGenderP(Float.parseFloat(resultMap.get(2001).getStratum1())); + aca.setRaceP(Float.parseFloat(resultMap.get(2011).getStratum1())); + aca.setEthP(Float.parseFloat(resultMap.get(2021).getStratum1())); + dcal.add(aca); + + aca = new DataCompletenessAttr(); + aca.setCovariance("10~20"); + aca.setGenderP(Float.parseFloat(resultMap.get(2002).getStratum1())); + aca.setRaceP(Float.parseFloat(resultMap.get(2012).getStratum1())); + aca.setEthP(Float.parseFloat(resultMap.get(2022).getStratum1())); + dcal.add(aca); + + aca = new DataCompletenessAttr(); + aca.setCovariance("20~30"); + aca.setGenderP(Float.parseFloat(resultMap.get(2003).getStratum1())); + aca.setRaceP(Float.parseFloat(resultMap.get(2013).getStratum1())); + aca.setEthP(Float.parseFloat(resultMap.get(2023).getStratum1())); + dcal.add(aca); + + aca = new DataCompletenessAttr(); + aca.setCovariance("30~40"); + aca.setGenderP(Float.parseFloat(resultMap.get(2004).getStratum1())); + aca.setRaceP(Float.parseFloat(resultMap.get(2014).getStratum1())); + aca.setEthP(Float.parseFloat(resultMap.get(2024).getStratum1())); + dcal.add(aca); + + aca = new DataCompletenessAttr(); + aca.setCovariance("40~50"); + aca.setGenderP(Float.parseFloat(resultMap.get(2005).getStratum1())); + aca.setRaceP(Float.parseFloat(resultMap.get(2015).getStratum1())); + aca.setEthP(Float.parseFloat(resultMap.get(2025).getStratum1())); + dcal.add(aca); + + aca = new DataCompletenessAttr(); + aca.setCovariance("50~60"); + aca.setGenderP(Float.parseFloat(resultMap.get(2006).getStratum1())); + aca.setRaceP(Float.parseFloat(resultMap.get(2016).getStratum1())); + aca.setEthP(Float.parseFloat(resultMap.get(2026).getStratum1())); + dcal.add(aca); + + aca = new DataCompletenessAttr(); + aca.setCovariance("60+"); + aca.setGenderP(Float.parseFloat(resultMap.get(2007).getStratum1())); + aca.setRaceP(Float.parseFloat(resultMap.get(2017).getStratum1())); + aca.setEthP(Float.parseFloat(resultMap.get(2027).getStratum1())); + dcal.add(aca); + + return dcal; + } + + public List getCohortAnalysesEntropy(final int id, String sourceKey) { + + String sql = null; + sql = ResourceHelper + .GetResourceAsString("/resources/cohortresults/sql/entropy/getEntropy.sql"); + + Source source = getSourceRepository().findBySourceKey(sourceKey); + String resultsTableQualifier = source.getTableQualifier(SourceDaimon.DaimonType.Results); + + sql = SqlRender.renderSql(sql, new String[] { "tableQualifier", "cohortDefinitionId" }, + new String[] { resultsTableQualifier, String.valueOf(id) }); + sql = SqlTranslate.translateSql(sql, getSourceDialect(), source.getSourceDialect()); + + AnalysisResultsMapper arm = new AnalysisResultsMapper(); + + return getSourceJdbcTemplate(source).query(sql, arm); + } + + @GET + @Path("{sourceKey}/{id}/entropy") + @Produces(MediaType.APPLICATION_JSON) + public List getEntropy(@PathParam("id") final int id, + @PathParam("sourceKey") String sourceKey) { + List arl = this.getCohortAnalysesEntropy(id, sourceKey); + + List el = new ArrayList(); + + for(AnalysisResults ar : arl){ + EntropyAttr ea = new EntropyAttr(); + ea.setDate(ar.getStratum1()); + ea.setEntropy(Float.parseFloat(ar.getStratum2())); + el.add(ea); + } + + return el; + } + private String JoinArray(final String[] array) { String result = ""; diff --git a/src/main/resources/resources/cohortanalysis/sql/runHeraclesAnalyses.sql b/src/main/resources/resources/cohortanalysis/sql/runHeraclesAnalyses.sql index 0ad8f718ef..dbb85a0d37 100644 --- a/src/main/resources/resources/cohortanalysis/sql/runHeraclesAnalyses.sql +++ b/src/main/resources/resources/cohortanalysis/sql/runHeraclesAnalyses.sql @@ -70,7 +70,10 @@ --cohort: '1700,1701' --cohort-specific analyses: '1800,1801,1802,1803,1804,1805,1806,1807,1808,1809,1810,1811,1812,1813,1814,1815,1816,1817,1818,1819,1820,1821,1830,1831,1840,1841,1850,1851,1860,1861,1870,1871' --measurement: 1300,1301,1302,1303,1304,1305,1306,1307,1308,1309,1310,1311,1312,1313,1314,1315,1316,1317,1318,1319,1320 - +--data completeness: gender: 2001, 2002, 2003, 2004, 2005, 2006, 2007 +--data completeness: race: 2011, 2012, 2013, 2014, 2015, 2016, 2017 +--data completeness: ethnicity: 2021, 2022, 2023, 2024, 2025, 2026, 2027 +--entropy: 2031 delete from @results_schema.HERACLES_results where cohort_definition_id IN (@cohort_definition_id) and analysis_id IN (@list_of_analysis_ids); delete from @results_schema.HERACLES_results_dist where cohort_definition_id IN (@cohort_definition_id) and analysis_id IN (@list_of_analysis_ids); @@ -5729,6 +5732,819 @@ group by c1.cohort_definition_id, ; --} +--{2001 IN (@list_of_analysis_ids)}?{ +-- 2001 Count and percentage of gender data completeness for age less than 10 +insert into @results_schema.HERACLES_results (cohort_definition_id, analysis_id, stratum_1, stratum_2, stratum_3) +select @cohort_definition_id as cohort_definition_id, 2001 as analysis_id, round(innerT.valid_percentage, 2), +innerT.all_data_count, innerT.valid_data_count +from +(select valid_data.valid_data_count valid_data_count, all_data.all_data_count all_data_count, + CASE + WHEN all_data.all_data_count = 0 THEN -999 + ELSE valid_data.valid_data_count * 100/all_data.all_data_count + END as valid_percentage +from +(SELECT count(distinct co.subject_id) as valid_data_count + FROM #HERACLES_cohort co + JOIN @CDM_schema.person p + ON co.SUBJECT_ID = p.PERSON_ID + AND p.YEAR_OF_BIRTH > + year(DATEADD(year, -10, getdate())) + --EXTRACT ( + -- YEAR FROM TRUNC (SYSDATE, 'yyyy') - INTERVAL '10' YEAR) + LEFT JOIN @CDM_schema.CONCEPT c ON p.GENDER_CONCEPT_ID = c.CONCEPT_ID + WHERE + p.gender_concept_id IS NOT NULL + and (lower(c.CONCEPT_NAME) = 'female' + or lower(c.CONCEPT_NAME) = 'male' )) valid_data, +(SELECT count(distinct co.subject_id) as all_data_count + FROM #HERACLES_cohort co + JOIN @CDM_schema.person p + ON co.SUBJECT_ID = p.PERSON_ID + AND p.YEAR_OF_BIRTH > + year(DATEADD(year, -10, getdate())) + --EXTRACT ( + -- YEAR FROM TRUNC (SYSDATE, 'yyyy') - INTERVAL '10' YEAR) + ) all_data) innerT; +--} + +--{2002 IN (@list_of_analysis_ids)}?{ +-- 2002 Count and percentage of gender data completeness for age between 10~20 +insert into @results_schema.HERACLES_results (cohort_definition_id, analysis_id, stratum_1, stratum_2, stratum_3) +select @cohort_definition_id as cohort_definition_id, 2002 as analysis_id, round(innerT.valid_percentage, 2), +innerT.all_data_count, innerT.valid_data_count +from +(select valid_data.valid_data_count valid_data_count, all_data.all_data_count all_data_count, + CASE + WHEN all_data.all_data_count = 0 THEN -999 + ELSE valid_data.valid_data_count * 100/all_data.all_data_count + END as valid_percentage +from +(SELECT count(distinct co.subject_id) as valid_data_count + FROM #HERACLES_cohort co + JOIN @CDM_schema.person p + ON co.SUBJECT_ID = p.PERSON_ID + AND p.YEAR_OF_BIRTH > + year(DATEADD(year, -20, getdate())) + AND p.YEAR_OF_BIRTH <= + year(DATEADD(year, -10, getdate())) + LEFT JOIN @CDM_schema.CONCEPT c ON p.GENDER_CONCEPT_ID = c.CONCEPT_ID + WHERE + p.gender_concept_id IS NOT NULL + and (lower(c.CONCEPT_NAME) = 'female' + or lower(c.CONCEPT_NAME) = 'male' )) valid_data, +(SELECT count(distinct co.subject_id) as all_data_count + FROM #HERACLES_cohort co + JOIN @CDM_schema.person p + ON co.SUBJECT_ID = p.PERSON_ID + AND p.YEAR_OF_BIRTH > + year(DATEADD(year, -20, getdate())) + AND p.YEAR_OF_BIRTH <= + year(DATEADD(year, -10, getdate())) + ) all_data) innerT; +--} + +--{2003 IN (@list_of_analysis_ids)}?{ +-- 2003 Count and percentage of gender data completeness for age between 20~30 +insert into @results_schema.HERACLES_results (cohort_definition_id, analysis_id, stratum_1, stratum_2, stratum_3) +select @cohort_definition_id as cohort_definition_id, 2003 as analysis_id, round(innerT.valid_percentage, 2), +innerT.all_data_count, innerT.valid_data_count +from +(select valid_data.valid_data_count valid_data_count, all_data.all_data_count all_data_count, + CASE + WHEN all_data.all_data_count = 0 THEN -999 + ELSE valid_data.valid_data_count * 100/all_data.all_data_count + END as valid_percentage +from +(SELECT count(distinct co.subject_id) as valid_data_count + FROM #HERACLES_cohort co + JOIN @CDM_schema.person p + ON co.SUBJECT_ID = p.PERSON_ID + AND p.YEAR_OF_BIRTH > + year(DATEADD(year, -30, getdate())) + AND p.YEAR_OF_BIRTH <= + year(DATEADD(year, -20, getdate())) + LEFT JOIN @CDM_schema.CONCEPT c ON p.GENDER_CONCEPT_ID = c.CONCEPT_ID + WHERE + p.gender_concept_id IS NOT NULL + and (lower(c.CONCEPT_NAME) = 'female' + or lower(c.CONCEPT_NAME) = 'male' )) valid_data, +(SELECT count(distinct co.subject_id) as all_data_count + FROM #HERACLES_cohort co + JOIN @CDM_schema.person p + ON co.SUBJECT_ID = p.PERSON_ID + AND p.YEAR_OF_BIRTH > + year(DATEADD(year, -30, getdate())) + AND p.YEAR_OF_BIRTH <= + year(DATEADD(year, -20, getdate())) + ) all_data) innerT; +--} + +--{2004 IN (@list_of_analysis_ids)}?{ +-- 2004 Count and percentage of gender data completeness for age between 30~40 +insert into @results_schema.HERACLES_results (cohort_definition_id, analysis_id, stratum_1, stratum_2, stratum_3) +select @cohort_definition_id as cohort_definition_id, 2004 as analysis_id, round(innerT.valid_percentage, 2), +innerT.all_data_count, innerT.valid_data_count +from +(select valid_data.valid_data_count valid_data_count, all_data.all_data_count all_data_count, + CASE + WHEN all_data.all_data_count = 0 THEN -999 + ELSE valid_data.valid_data_count * 100/all_data.all_data_count + END as valid_percentage +from +(SELECT count(distinct co.subject_id) as valid_data_count + FROM #HERACLES_cohort co + JOIN @CDM_schema.person p + ON co.SUBJECT_ID = p.PERSON_ID + AND p.YEAR_OF_BIRTH > + year(DATEADD(year, -40, getdate())) + AND p.YEAR_OF_BIRTH <= + year(DATEADD(year, -30, getdate())) + LEFT JOIN @CDM_schema.CONCEPT c ON p.GENDER_CONCEPT_ID = c.CONCEPT_ID + WHERE + p.gender_concept_id IS NOT NULL + and (lower(c.CONCEPT_NAME) = 'female' + or lower(c.CONCEPT_NAME) = 'male' )) valid_data, +(SELECT count(distinct co.subject_id) as all_data_count + FROM #HERACLES_cohort co + JOIN @CDM_schema.person p + ON co.SUBJECT_ID = p.PERSON_ID + AND p.YEAR_OF_BIRTH > + year(DATEADD(year, -40, getdate())) + AND p.YEAR_OF_BIRTH <= + year(DATEADD(year, -30, getdate())) + ) all_data) innerT; +--} + +--{2005 IN (@list_of_analysis_ids)}?{ +-- 2005 Count and percentage of gender data completeness for age between 40~50 +insert into @results_schema.HERACLES_results (cohort_definition_id, analysis_id, stratum_1, stratum_2, stratum_3) +select @cohort_definition_id as cohort_definition_id, 2005 as analysis_id, round(innerT.valid_percentage, 2), +innerT.all_data_count, innerT.valid_data_count +from +(select valid_data.valid_data_count valid_data_count, all_data.all_data_count all_data_count, + CASE + WHEN all_data.all_data_count = 0 THEN -999 + ELSE valid_data.valid_data_count * 100/all_data.all_data_count + END as valid_percentage +from +(SELECT count(distinct co.subject_id) as valid_data_count + FROM #HERACLES_cohort co + JOIN @CDM_schema.person p + ON co.SUBJECT_ID = p.PERSON_ID + AND p.YEAR_OF_BIRTH > + year(DATEADD(year, -50, getdate())) + AND p.YEAR_OF_BIRTH <= + year(DATEADD(year, -40, getdate())) + LEFT JOIN @CDM_schema.CONCEPT c ON p.GENDER_CONCEPT_ID = c.CONCEPT_ID + WHERE + p.gender_concept_id IS NOT NULL + and (lower(c.CONCEPT_NAME) = 'female' + or lower(c.CONCEPT_NAME) = 'male' )) valid_data, +(SELECT count(distinct co.subject_id) as all_data_count + FROM #HERACLES_cohort co + JOIN @CDM_schema.person p + ON co.SUBJECT_ID = p.PERSON_ID + AND p.YEAR_OF_BIRTH > + year(DATEADD(year, -50, getdate())) + AND p.YEAR_OF_BIRTH <= + year(DATEADD(year, -40, getdate())) + ) all_data) innerT; +--} + +--{2006 IN (@list_of_analysis_ids)}?{ +-- 2006 Count and percentage of gender data completeness for age between 50~60 +insert into @results_schema.HERACLES_results (cohort_definition_id, analysis_id, stratum_1, stratum_2, stratum_3) +select @cohort_definition_id as cohort_definition_id, 2006 as analysis_id, round(innerT.valid_percentage, 2), +innerT.all_data_count, innerT.valid_data_count +from +(select valid_data.valid_data_count valid_data_count, all_data.all_data_count all_data_count, + CASE + WHEN all_data.all_data_count = 0 THEN -999 + ELSE valid_data.valid_data_count * 100/all_data.all_data_count + END as valid_percentage +from +(SELECT count(distinct co.subject_id) as valid_data_count + FROM #HERACLES_cohort co + JOIN @CDM_schema.person p + ON co.SUBJECT_ID = p.PERSON_ID + AND p.YEAR_OF_BIRTH > + year(DATEADD(year, -60, getdate())) + AND p.YEAR_OF_BIRTH <= + year(DATEADD(year, -50, getdate())) + LEFT JOIN @CDM_schema.CONCEPT c ON p.GENDER_CONCEPT_ID = c.CONCEPT_ID + WHERE + p.gender_concept_id IS NOT NULL + and (lower(c.CONCEPT_NAME) = 'female' + or lower(c.CONCEPT_NAME) = 'male' )) valid_data, +(SELECT count(distinct co.subject_id) as all_data_count + FROM #HERACLES_cohort co + JOIN @CDM_schema.person p + ON co.SUBJECT_ID = p.PERSON_ID + AND p.YEAR_OF_BIRTH > + year(DATEADD(year, -60, getdate())) + AND p.YEAR_OF_BIRTH <= + year(DATEADD(year, -50, getdate())) + ) all_data) innerT; +--} + +--{2007 IN (@list_of_analysis_ids)}?{ +-- 2007 Count and percentage of gender data completeness for age between 60+ +insert into @results_schema.HERACLES_results (cohort_definition_id, analysis_id, stratum_1, stratum_2, stratum_3) +select @cohort_definition_id as cohort_definition_id, 2007 as analysis_id, round(innerT.valid_percentage, 2), +innerT.all_data_count, innerT.valid_data_count +from +(select valid_data.valid_data_count valid_data_count, all_data.all_data_count all_data_count, + CASE + WHEN all_data.all_data_count = 0 THEN -999 + ELSE valid_data.valid_data_count * 100/all_data.all_data_count + END as valid_percentage +from +(SELECT count(distinct co.subject_id) as valid_data_count + FROM #HERACLES_cohort co + JOIN @CDM_schema.person p + ON co.SUBJECT_ID = p.PERSON_ID + AND p.YEAR_OF_BIRTH <= + year(DATEADD(year, -60, getdate())) + LEFT JOIN @CDM_schema.CONCEPT c ON p.GENDER_CONCEPT_ID = c.CONCEPT_ID + WHERE + p.gender_concept_id IS NOT NULL + and (lower(c.CONCEPT_NAME) = 'female' + or lower(c.CONCEPT_NAME) = 'male' )) valid_data, +(SELECT count(distinct co.subject_id) as all_data_count + FROM #HERACLES_cohort co + JOIN @CDM_schema.person p + ON co.SUBJECT_ID = p.PERSON_ID + AND p.YEAR_OF_BIRTH <= + year(DATEADD(year, -60, getdate())) + ) all_data) innerT; +--} + +--{2011 IN (@list_of_analysis_ids)}?{ +-- 2011 Count and percentage of race data completeness for age less than 10 +insert into @results_schema.HERACLES_results (cohort_definition_id, analysis_id, stratum_1, stratum_2, stratum_3) +select @cohort_definition_id as cohort_definition_id, 2011 as analysis_id, round(innerT.valid_percentage, 2), +innerT.all_data_count, innerT.valid_data_count +from +(select valid_data.valid_data_count valid_data_count, all_data.all_data_count all_data_count, + CASE + WHEN all_data.all_data_count = 0 THEN -999 + ELSE valid_data.valid_data_count * 100/all_data.all_data_count + END as valid_percentage +from +(SELECT count(distinct co.subject_id) as valid_data_count + FROM #HERACLES_cohort co + JOIN @CDM_schema.person p + ON co.SUBJECT_ID = p.PERSON_ID + AND p.YEAR_OF_BIRTH > + year(DATEADD(year, -10, getdate())) + LEFT JOIN @CDM_schema.CONCEPT c ON p.RACE_CONCEPT_ID = c.CONCEPT_ID + WHERE + p.RACE_CONCEPT_ID IS NOT NULL + and (lower(c.CONCEPT_NAME) not like 'other%' + and lower(c.CONCEPT_NAME) not like 'non%' + and lower(c.CONCEPT_NAME) not like '%not %' + and lower(c.CONCEPT_NAME) not like 'unknown' + )) valid_data, +(SELECT count(distinct co.subject_id) as all_data_count + FROM #HERACLES_cohort co + JOIN @CDM_schema.person p + ON co.SUBJECT_ID = p.PERSON_ID + AND p.YEAR_OF_BIRTH > + year(DATEADD(year, -10, getdate())) + ) all_data) innerT; +--} + +--{2012 IN (@list_of_analysis_ids)}?{ +-- 2012 Count and percentage of race data completeness for age between 10~20 +insert into @results_schema.HERACLES_results (cohort_definition_id, analysis_id, stratum_1, stratum_2, stratum_3) +select @cohort_definition_id as cohort_definition_id, 2012 as analysis_id, round(innerT.valid_percentage, 2), +innerT.all_data_count, innerT.valid_data_count +from +(select valid_data.valid_data_count valid_data_count, all_data.all_data_count all_data_count, + CASE + WHEN all_data.all_data_count = 0 THEN -999 + ELSE valid_data.valid_data_count * 100/all_data.all_data_count + END as valid_percentage +from +(SELECT count(distinct co.subject_id) as valid_data_count + FROM #HERACLES_cohort co + JOIN @CDM_schema.person p + ON co.SUBJECT_ID = p.PERSON_ID + AND p.YEAR_OF_BIRTH > + year(DATEADD(year, -20, getdate())) + AND p.YEAR_OF_BIRTH <= + year(DATEADD(year, -10, getdate())) + LEFT JOIN @CDM_schema.CONCEPT c ON p.RACE_CONCEPT_ID = c.CONCEPT_ID + WHERE + p.RACE_CONCEPT_ID IS NOT NULL + and (lower(c.CONCEPT_NAME) not like 'other%' + and lower(c.CONCEPT_NAME) not like 'non%' + and lower(c.CONCEPT_NAME) not like '%not %' + and lower(c.CONCEPT_NAME) not like 'unknown' + )) valid_data, +(SELECT count(distinct co.subject_id) as all_data_count + FROM #HERACLES_cohort co + JOIN @CDM_schema.person p + ON co.SUBJECT_ID = p.PERSON_ID + AND p.YEAR_OF_BIRTH > + year(DATEADD(year, -20, getdate())) + AND p.YEAR_OF_BIRTH <= + year(DATEADD(year, -10, getdate())) + ) all_data) innerT; +--} + +--{2013 IN (@list_of_analysis_ids)}?{ +-- 2013 Count and percentage of race data completeness for age between 20~30 +insert into @results_schema.HERACLES_results (cohort_definition_id, analysis_id, stratum_1, stratum_2, stratum_3) +select @cohort_definition_id as cohort_definition_id, 2013 as analysis_id, round(innerT.valid_percentage, 2), +innerT.all_data_count, innerT.valid_data_count +from +(select valid_data.valid_data_count valid_data_count, all_data.all_data_count all_data_count, + CASE + WHEN all_data.all_data_count = 0 THEN -999 + ELSE valid_data.valid_data_count * 100/all_data.all_data_count + END as valid_percentage +from +(SELECT count(distinct co.subject_id) as valid_data_count + FROM #HERACLES_cohort co + JOIN @CDM_schema.person p + ON co.SUBJECT_ID = p.PERSON_ID + AND p.YEAR_OF_BIRTH > + year(DATEADD(year, -30, getdate())) + AND p.YEAR_OF_BIRTH <= + year(DATEADD(year, -20, getdate())) + LEFT JOIN @CDM_schema.CONCEPT c ON p.RACE_CONCEPT_ID = c.CONCEPT_ID + WHERE + p.RACE_CONCEPT_ID IS NOT NULL + and (lower(c.CONCEPT_NAME) not like 'other%' + and lower(c.CONCEPT_NAME) not like 'non%' + and lower(c.CONCEPT_NAME) not like '%not %' + and lower(c.CONCEPT_NAME) not like 'unknown' + )) valid_data, +(SELECT count(distinct co.subject_id) as all_data_count + FROM #HERACLES_cohort co + JOIN @CDM_schema.person p + ON co.SUBJECT_ID = p.PERSON_ID + AND p.YEAR_OF_BIRTH > + year(DATEADD(year, -30, getdate())) + AND p.YEAR_OF_BIRTH <= + year(DATEADD(year, -20, getdate())) + ) all_data) innerT; +--} + +--{2014 IN (@list_of_analysis_ids)}?{ +-- 2014 Count and percentage of race data completeness for age between 30~40 +insert into @results_schema.HERACLES_results (cohort_definition_id, analysis_id, stratum_1, stratum_2, stratum_3) +select @cohort_definition_id as cohort_definition_id, 2014 as analysis_id, round(innerT.valid_percentage, 2), +innerT.all_data_count, innerT.valid_data_count +from +(select valid_data.valid_data_count valid_data_count, all_data.all_data_count all_data_count, + CASE + WHEN all_data.all_data_count = 0 THEN -999 + ELSE valid_data.valid_data_count * 100/all_data.all_data_count + END as valid_percentage +from +(SELECT count(distinct co.subject_id) as valid_data_count + FROM #HERACLES_cohort co + JOIN @CDM_schema.person p + ON co.SUBJECT_ID = p.PERSON_ID + AND p.YEAR_OF_BIRTH > + year(DATEADD(year, -40, getdate())) + AND p.YEAR_OF_BIRTH <= + year(DATEADD(year, -30, getdate())) + LEFT JOIN @CDM_schema.CONCEPT c ON p.RACE_CONCEPT_ID = c.CONCEPT_ID + WHERE + p.RACE_CONCEPT_ID IS NOT NULL + and (lower(c.CONCEPT_NAME) not like 'other%' + and lower(c.CONCEPT_NAME) not like 'non%' + and lower(c.CONCEPT_NAME) not like '%not %' + and lower(c.CONCEPT_NAME) not like 'unknown' + )) valid_data, +(SELECT count(distinct co.subject_id) as all_data_count + FROM #HERACLES_cohort co + JOIN @CDM_schema.person p + ON co.SUBJECT_ID = p.PERSON_ID + AND p.YEAR_OF_BIRTH > + year(DATEADD(year, -40, getdate())) + AND p.YEAR_OF_BIRTH <= + year(DATEADD(year, -30, getdate())) + ) all_data) innerT; +--} + +--{2015 IN (@list_of_analysis_ids)}?{ +-- 2015 Count and percentage of race data completeness for age between 40~50 +insert into @results_schema.HERACLES_results (cohort_definition_id, analysis_id, stratum_1, stratum_2, stratum_3) +select @cohort_definition_id as cohort_definition_id, 2015 as analysis_id, round(innerT.valid_percentage, 2), +innerT.all_data_count, innerT.valid_data_count +from +(select valid_data.valid_data_count valid_data_count, all_data.all_data_count all_data_count, + CASE + WHEN all_data.all_data_count = 0 THEN -999 + ELSE valid_data.valid_data_count * 100/all_data.all_data_count + END as valid_percentage +from +(SELECT count(distinct co.subject_id) as valid_data_count + FROM #HERACLES_cohort co + JOIN @CDM_schema.person p + ON co.SUBJECT_ID = p.PERSON_ID + AND p.YEAR_OF_BIRTH > + year(DATEADD(year, -50, getdate())) + AND p.YEAR_OF_BIRTH <= + year(DATEADD(year, -40, getdate())) + LEFT JOIN @CDM_schema.CONCEPT c ON p.RACE_CONCEPT_ID = c.CONCEPT_ID + WHERE + p.RACE_CONCEPT_ID IS NOT NULL + and (lower(c.CONCEPT_NAME) not like 'other%' + and lower(c.CONCEPT_NAME) not like 'non%' + and lower(c.CONCEPT_NAME) not like '%not %' + and lower(c.CONCEPT_NAME) not like 'unknown' + )) valid_data, +(SELECT count(distinct co.subject_id) as all_data_count + FROM #HERACLES_cohort co + JOIN @CDM_schema.person p + ON co.SUBJECT_ID = p.PERSON_ID + AND p.YEAR_OF_BIRTH > + year(DATEADD(year, -50, getdate())) + AND p.YEAR_OF_BIRTH <= + year(DATEADD(year, -40, getdate())) + ) all_data) innerT; +--} + +--{2016 IN (@list_of_analysis_ids)}?{ +-- 2016 Count and percentage of race data completeness for age between 50~60 +insert into @results_schema.HERACLES_results (cohort_definition_id, analysis_id, stratum_1, stratum_2, stratum_3) +select @cohort_definition_id as cohort_definition_id, 2016 as analysis_id, round(innerT.valid_percentage, 2), +innerT.all_data_count, innerT.valid_data_count +from +(select valid_data.valid_data_count valid_data_count, all_data.all_data_count all_data_count, + CASE + WHEN all_data.all_data_count = 0 THEN -999 + ELSE valid_data.valid_data_count * 100/all_data.all_data_count + END as valid_percentage +from +(SELECT count(distinct co.subject_id) as valid_data_count + FROM #HERACLES_cohort co + JOIN @CDM_schema.person p + ON co.SUBJECT_ID = p.PERSON_ID + AND p.YEAR_OF_BIRTH > + year(DATEADD(year, -60, getdate())) + AND p.YEAR_OF_BIRTH <= + year(DATEADD(year, -50, getdate())) + LEFT JOIN @CDM_schema.CONCEPT c ON p.RACE_CONCEPT_ID = c.CONCEPT_ID + WHERE + p.RACE_CONCEPT_ID IS NOT NULL + and (lower(c.CONCEPT_NAME) not like 'other%' + and lower(c.CONCEPT_NAME) not like 'non%' + and lower(c.CONCEPT_NAME) not like '%not %' + and lower(c.CONCEPT_NAME) not like 'unknown' + )) valid_data, +(SELECT count(distinct co.subject_id) as all_data_count + FROM #HERACLES_cohort co + JOIN @CDM_schema.person p + ON co.SUBJECT_ID = p.PERSON_ID + AND p.YEAR_OF_BIRTH > + year(DATEADD(year, -60, getdate())) + AND p.YEAR_OF_BIRTH <= + year(DATEADD(year, -50, getdate())) + ) all_data) innerT; +--} + +--{2017 IN (@list_of_analysis_ids)}?{ +-- 2017 Count and percentage of race data completeness for age between 60+ +insert into @results_schema.HERACLES_results (cohort_definition_id, analysis_id, stratum_1, stratum_2, stratum_3) +select @cohort_definition_id as cohort_definition_id, 2017 as analysis_id, round(innerT.valid_percentage, 2), +innerT.all_data_count, innerT.valid_data_count +from +(select valid_data.valid_data_count valid_data_count, all_data.all_data_count all_data_count, + CASE + WHEN all_data.all_data_count = 0 THEN -999 + ELSE valid_data.valid_data_count * 100/all_data.all_data_count + END as valid_percentage +from +(SELECT count(distinct co.subject_id) as valid_data_count + FROM #HERACLES_cohort co + JOIN @CDM_schema.person p + ON co.SUBJECT_ID = p.PERSON_ID + AND p.YEAR_OF_BIRTH <= + year(DATEADD(year, -60, getdate())) + LEFT JOIN @CDM_schema.CONCEPT c ON p.RACE_CONCEPT_ID = c.CONCEPT_ID + WHERE + p.RACE_CONCEPT_ID IS NOT NULL + and (lower(c.CONCEPT_NAME) not like 'other%' + and lower(c.CONCEPT_NAME) not like 'non%' + and lower(c.CONCEPT_NAME) not like '%not %' + and lower(c.CONCEPT_NAME) not like 'unknown' + )) valid_data, +(SELECT count(distinct co.subject_id) as all_data_count + FROM #HERACLES_cohort co + JOIN @CDM_schema.person p + ON co.SUBJECT_ID = p.PERSON_ID + AND p.YEAR_OF_BIRTH <= + year(DATEADD(year, -60, getdate())) + ) all_data) innerT; +--} + +--{2021 IN (@list_of_analysis_ids)}?{ +-- 2021 Count and percentage of ethnicity data completeness for age less than 10 +insert into @results_schema.HERACLES_results (cohort_definition_id, analysis_id, stratum_1, stratum_2, stratum_3) +select @cohort_definition_id as cohort_definition_id, 2021 as analysis_id, round(innerT.valid_percentage, 2), +innerT.all_data_count, innerT.valid_data_count +from +(select valid_data.valid_data_count valid_data_count, all_data.all_data_count all_data_count, + CASE + WHEN all_data.all_data_count = 0 THEN -999 + ELSE valid_data.valid_data_count * 100/all_data.all_data_count + END as valid_percentage +from +(SELECT count(distinct co.subject_id) as valid_data_count + FROM #HERACLES_cohort co + JOIN @CDM_schema.person p + ON co.SUBJECT_ID = p.PERSON_ID + AND p.YEAR_OF_BIRTH > + year(DATEADD(year, -10, getdate())) + LEFT JOIN @CDM_schema.CONCEPT c ON p.ETHNICITY_CONCEPT_ID = c.CONCEPT_ID + WHERE + p.ETHNICITY_CONCEPT_ID IS NOT NULL + and ( + lower(c.CONCEPT_NAME) not like '%unknown%' + )) valid_data, +(SELECT count(distinct co.subject_id) as all_data_count + FROM #HERACLES_cohort co + JOIN @CDM_schema.person p + ON co.SUBJECT_ID = p.PERSON_ID + AND p.YEAR_OF_BIRTH > + year(DATEADD(year, -10, getdate())) + ) all_data) innerT; +--} + +--{2022 IN (@list_of_analysis_ids)}?{ +-- 2022 Count and percentage of ethnicity data completeness for age between 10~20 +insert into @results_schema.HERACLES_results (cohort_definition_id, analysis_id, stratum_1, stratum_2, stratum_3) +select @cohort_definition_id as cohort_definition_id, 2022 as analysis_id, round(innerT.valid_percentage, 2), +innerT.all_data_count, innerT.valid_data_count +from +(select valid_data.valid_data_count valid_data_count, all_data.all_data_count all_data_count, + CASE + WHEN all_data.all_data_count = 0 THEN -999 + ELSE valid_data.valid_data_count * 100/all_data.all_data_count + END as valid_percentage +from +(SELECT count(distinct co.subject_id) as valid_data_count + FROM #HERACLES_cohort co + JOIN @CDM_schema.person p + ON co.SUBJECT_ID = p.PERSON_ID + AND p.YEAR_OF_BIRTH > + year(DATEADD(year, -20, getdate())) + AND p.YEAR_OF_BIRTH <= + year(DATEADD(year, -10, getdate())) + LEFT JOIN @CDM_schema.CONCEPT c ON p.ETHNICITY_CONCEPT_ID = c.CONCEPT_ID + WHERE + p.ETHNICITY_CONCEPT_ID IS NOT NULL + and ( + lower(c.CONCEPT_NAME) not like '%unknown%' + )) valid_data, +(SELECT count(distinct co.subject_id) as all_data_count + FROM #HERACLES_cohort co + JOIN @CDM_schema.person p + ON co.SUBJECT_ID = p.PERSON_ID + AND p.YEAR_OF_BIRTH > + year(DATEADD(year, -20, getdate())) + AND p.YEAR_OF_BIRTH <= + year(DATEADD(year, -10, getdate())) + ) all_data) innerT; +--} + +--{2023 IN (@list_of_analysis_ids)}?{ +-- 2023 Count and percentage of ethnicity data completeness for age between 20~30 +insert into @results_schema.HERACLES_results (cohort_definition_id, analysis_id, stratum_1, stratum_2, stratum_3) +select @cohort_definition_id as cohort_definition_id, 2023 as analysis_id, round(innerT.valid_percentage, 2), +innerT.all_data_count, innerT.valid_data_count +from +(select valid_data.valid_data_count valid_data_count, all_data.all_data_count all_data_count, + CASE + WHEN all_data.all_data_count = 0 THEN -999 + ELSE valid_data.valid_data_count * 100/all_data.all_data_count + END as valid_percentage +from +(SELECT count(distinct co.subject_id) as valid_data_count + FROM #HERACLES_cohort co + JOIN @CDM_schema.person p + ON co.SUBJECT_ID = p.PERSON_ID + AND p.YEAR_OF_BIRTH > + year(DATEADD(year, -30, getdate())) + AND p.YEAR_OF_BIRTH <= + year(DATEADD(year, -20, getdate())) + LEFT JOIN @CDM_schema.CONCEPT c ON p.ETHNICITY_CONCEPT_ID = c.CONCEPT_ID + WHERE + p.ETHNICITY_CONCEPT_ID IS NOT NULL + and ( + lower(c.CONCEPT_NAME) not like '%unknown%' + )) valid_data, +(SELECT count(distinct co.subject_id) as all_data_count + FROM #HERACLES_cohort co + JOIN @CDM_schema.person p + ON co.SUBJECT_ID = p.PERSON_ID + AND p.YEAR_OF_BIRTH > + year(DATEADD(year, -30, getdate())) + AND p.YEAR_OF_BIRTH <= + year(DATEADD(year, -20, getdate())) + ) all_data) innerT; +--} + +--{2024 IN (@list_of_analysis_ids)}?{ +-- 2024 Count and percentage of ethnicity data completeness for age between 30~40 +insert into @results_schema.HERACLES_results (cohort_definition_id, analysis_id, stratum_1, stratum_2, stratum_3) +select @cohort_definition_id as cohort_definition_id, 2024 as analysis_id, round(innerT.valid_percentage, 2), +innerT.all_data_count, innerT.valid_data_count +from +(select valid_data.valid_data_count valid_data_count, all_data.all_data_count all_data_count, + CASE + WHEN all_data.all_data_count = 0 THEN -999 + ELSE valid_data.valid_data_count * 100/all_data.all_data_count + END as valid_percentage +from +(SELECT count(distinct co.subject_id) as valid_data_count + FROM #HERACLES_cohort co + JOIN @CDM_schema.person p + ON co.SUBJECT_ID = p.PERSON_ID + AND p.YEAR_OF_BIRTH > + year(DATEADD(year, -40, getdate())) + AND p.YEAR_OF_BIRTH <= + year(DATEADD(year, -30, getdate())) + LEFT JOIN @CDM_schema.CONCEPT c ON p.ETHNICITY_CONCEPT_ID = c.CONCEPT_ID + WHERE + p.ETHNICITY_CONCEPT_ID IS NOT NULL + and ( + lower(c.CONCEPT_NAME) not like '%unknown%' + )) valid_data, +(SELECT count(distinct co.subject_id) as all_data_count + FROM #HERACLES_cohort co + JOIN @CDM_schema.person p + ON co.SUBJECT_ID = p.PERSON_ID + AND p.YEAR_OF_BIRTH > + year(DATEADD(year, -40, getdate())) + AND p.YEAR_OF_BIRTH <= + year(DATEADD(year, -30, getdate())) + ) all_data) innerT; +--} + +--{2025 IN (@list_of_analysis_ids)}?{ +-- 2025 Count and percentage of ethnicity data completeness for age between 40~50 +insert into @results_schema.HERACLES_results (cohort_definition_id, analysis_id, stratum_1, stratum_2, stratum_3) +select @cohort_definition_id as cohort_definition_id, 2025 as analysis_id, round(innerT.valid_percentage, 2), +innerT.all_data_count, innerT.valid_data_count +from +(select valid_data.valid_data_count valid_data_count, all_data.all_data_count all_data_count, + CASE + WHEN all_data.all_data_count = 0 THEN -999 + ELSE valid_data.valid_data_count * 100/all_data.all_data_count + END as valid_percentage +from +(SELECT count(distinct co.subject_id) as valid_data_count + FROM #HERACLES_cohort co + JOIN @CDM_schema.person p + ON co.SUBJECT_ID = p.PERSON_ID + AND p.YEAR_OF_BIRTH > + year(DATEADD(year, -50, getdate())) + AND p.YEAR_OF_BIRTH <= + year(DATEADD(year, -40, getdate())) + LEFT JOIN @CDM_schema.CONCEPT c ON p.ETHNICITY_CONCEPT_ID = c.CONCEPT_ID + WHERE + p.ETHNICITY_CONCEPT_ID IS NOT NULL + and ( + lower(c.CONCEPT_NAME) not like '%unknown%' + )) valid_data, +(SELECT count(distinct co.subject_id) as all_data_count + FROM #HERACLES_cohort co + JOIN @CDM_schema.person p + ON co.SUBJECT_ID = p.PERSON_ID + AND p.YEAR_OF_BIRTH > + year(DATEADD(year, -50, getdate())) + AND p.YEAR_OF_BIRTH <= + year(DATEADD(year, -40, getdate())) + ) all_data) innerT; +--} + +--{2026 IN (@list_of_analysis_ids)}?{ +-- 2026 Count and percentage of ethnicity data completeness for age between 50~60 +insert into @results_schema.HERACLES_results (cohort_definition_id, analysis_id, stratum_1, stratum_2, stratum_3) +select @cohort_definition_id as cohort_definition_id, 2026 as analysis_id, round(innerT.valid_percentage, 2), +innerT.all_data_count, innerT.valid_data_count +from +(select valid_data.valid_data_count valid_data_count, all_data.all_data_count all_data_count, + CASE + WHEN all_data.all_data_count = 0 THEN -999 + ELSE valid_data.valid_data_count * 100/all_data.all_data_count + END as valid_percentage +from +(SELECT count(distinct co.subject_id) as valid_data_count + FROM #HERACLES_cohort co + JOIN @CDM_schema.person p + ON co.SUBJECT_ID = p.PERSON_ID + AND p.YEAR_OF_BIRTH > + year(DATEADD(year, -60, getdate())) + AND p.YEAR_OF_BIRTH <= + year(DATEADD(year, -50, getdate())) + LEFT JOIN @CDM_schema.CONCEPT c ON p.ETHNICITY_CONCEPT_ID = c.CONCEPT_ID + WHERE + p.ETHNICITY_CONCEPT_ID IS NOT NULL + and ( + lower(c.CONCEPT_NAME) not like '%unknown%' + )) valid_data, +(SELECT count(distinct co.subject_id) as all_data_count + FROM #HERACLES_cohort co + JOIN @CDM_schema.person p + ON co.SUBJECT_ID = p.PERSON_ID + AND p.YEAR_OF_BIRTH > + year(DATEADD(year, -60, getdate())) + AND p.YEAR_OF_BIRTH <= + year(DATEADD(year, -50, getdate())) + ) all_data) innerT; +--} + +--{2027 IN (@list_of_analysis_ids)}?{ +-- 2027 Count and percentage of ethnicity data completeness for age 60+ +insert into @results_schema.HERACLES_results (cohort_definition_id, analysis_id, stratum_1, stratum_2, stratum_3) +select @cohort_definition_id as cohort_definition_id, 2027 as analysis_id, round(innerT.valid_percentage, 2), +innerT.all_data_count, innerT.valid_data_count +from +(select valid_data.valid_data_count valid_data_count, all_data.all_data_count all_data_count, + CASE + WHEN all_data.all_data_count = 0 THEN -999 + ELSE valid_data.valid_data_count * 100/all_data.all_data_count + END as valid_percentage +from +(SELECT count(distinct co.subject_id) as valid_data_count + FROM #HERACLES_cohort co + JOIN @CDM_schema.person p + ON co.SUBJECT_ID = p.PERSON_ID + AND p.YEAR_OF_BIRTH <= + year(DATEADD(year, -60, getdate())) + LEFT JOIN @CDM_schema.CONCEPT c ON p.ETHNICITY_CONCEPT_ID = c.CONCEPT_ID + WHERE + p.ETHNICITY_CONCEPT_ID IS NOT NULL + and ( + lower(c.CONCEPT_NAME) not like '%unknown%' + )) valid_data, +(SELECT count(distinct co.subject_id) as all_data_count + FROM #HERACLES_cohort co + JOIN @CDM_schema.person p + ON co.SUBJECT_ID = p.PERSON_ID + AND p.YEAR_OF_BIRTH <= + year(DATEADD(year, -60, getdate())) + ) all_data) innerT; +--} + +--{2031 IN (@list_of_analysis_ids)}?{ +-- 2031 entropy +INSERT INTO @results_schema.HERACLES_results (cohort_definition_id, + analysis_id, + stratum_1, + stratum_2) + SELECT @cohort_definition_id AS cohort_definition_id, + 2031 AS analysis_id, + entropyT.d, + cast(round(entropyT.entropy, 3) as varchar(max)) + FROM +(select + obs_date as d, +-- CAST(YEAR(obs_date) as varchar(4)) + '-' + RIGHT('0' + RTRIM(MONTH(obs_date)), 2) + '-' + RIGHT('0' + RTRIM(day(obs_date)), 2) as d, +-- cast(obs_date as varchar(10)) d, + sum(probTimesLog) entropy from + (select obs_date, value_as_string, (1.0 * cnt) / (1.0 * total_per_day) prob, +-- (-1.0) * (((1.0 * cnt) / (1.0 * total_per_day))* log(2, (1.0 * cnt) / (1.0 * total_per_day))) probTimesLog, + (-1.0) * (((1.0 * cnt) / (1.0 * total_per_day)) * (log((1.0 * cnt) / (1.0 * total_per_day))/log(2))) probTimesLog, + cnt, total_per_day + from + (select obs_date, value_as_string, cnt, sum(cnt) + over (partition by obs_date) + as total_per_day + from + ( + select all_observ.value_as_string, + CAST(YEAR(all_observ.observation_date) as varchar(4)) + '-' + RIGHT('0' + RTRIM(MONTH(all_observ.observation_date)), 2) + '-' + RIGHT('0' + RTRIM(day(all_observ.observation_date)), 2) as obs_date, + --observation_date as obs_date, + COUNT_BIG(*) + OVER (PARTITION BY all_observ.value_as_string, +-- DATEFROMPARTS(YEAR(all_observ.observation_date),MONTH(all_observ.observation_date),DAY(all_observ.observation_date)) + CAST(YEAR(all_observ.observation_date) as varchar(4)) + '-' + RIGHT('0' + RTRIM(MONTH(all_observ.observation_date)), 2) + '-' + RIGHT('0' + RTRIM(day(all_observ.observation_date)), 2) + --trunc(all_observ.observation_date) + ) + as cnt + from + (select * from @CDM_schema.OBSERVATION observ + join #HERACLES_cohort co + on co.SUBJECT_ID = observ.PERSON_ID + and observ.observation_date >= co.cohort_start_date + and observ.observation_date <= co.cohort_end_date) all_observ + ) value_day_cnt + ) with_sum + ) allProb + group by obs_date +) entropyT; +--} + TRUNCATE TABLE #HERACLES_cohort; DROP TABLE #HERACLES_cohort; @@ -6439,4 +7255,3 @@ WHERE ord1.analysis_id IN (717) --} - diff --git a/src/main/resources/resources/cohortresults/sql/datacompleteness/getCohortDataCompleteness.sql b/src/main/resources/resources/cohortresults/sql/datacompleteness/getCohortDataCompleteness.sql new file mode 100644 index 0000000000..c1fc4ece42 --- /dev/null +++ b/src/main/resources/resources/cohortresults/sql/datacompleteness/getCohortDataCompleteness.sql @@ -0,0 +1,5 @@ +select COHORT_DEFINITION_ID, ANALYSIS_ID, STRATUM_1, STRATUM_2, STRATUM_3, STRATUM_4, STRATUM_5, count_value, last_update_time +from @tableQualifier.heracles_results +where cohort_definition_id = @cohortDefinitionId +and analysis_id in +(2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2021, 2022, 2023, 2024, 2025, 2026, 2027, 2028, 2029) diff --git a/src/main/resources/resources/cohortresults/sql/entropy/getEntropy.sql b/src/main/resources/resources/cohortresults/sql/entropy/getEntropy.sql new file mode 100644 index 0000000000..8a23da4209 --- /dev/null +++ b/src/main/resources/resources/cohortresults/sql/entropy/getEntropy.sql @@ -0,0 +1,4 @@ +select COHORT_DEFINITION_ID, ANALYSIS_ID, STRATUM_1, STRATUM_2, STRATUM_3, STRATUM_4, STRATUM_5, count_value, last_update_time +from @tableQualifier.heracles_results +where cohort_definition_id = @cohortDefinitionId +and analysis_id = 2031