Skip to content

Commit

Permalink
Merge pull request #1225 from synthetichealth/bfd_to_ccw
Browse files Browse the repository at this point in the history
Utility to map from the BB2 RIF format to the CCW RIF format.
  • Loading branch information
jawalonoski authored Jan 6, 2023
2 parents a9f4437 + 9c000b3 commit 340615e
Show file tree
Hide file tree
Showing 11 changed files with 1,561 additions and 0 deletions.
7 changes: 7 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,13 @@ task rifMinimize(type: JavaExec) {
mainClass = "org.mitre.synthea.export.rif.tools.BB2RIFMinimizer"
}

task rif2CCW(type: JavaExec) {
group 'Application'
description 'Convert exported RIF files from BB2 format to CCW format'
classpath sourceSets.main.runtimeClasspath
mainClass = "org.mitre.synthea.export.rif.tools.BB2RIF2CCW"
}

shadowJar {
def versionFile = new File("$projectDir/src/main/resources/version.txt")
def versionText = "N/A"
Expand Down
110 changes: 110 additions & 0 deletions src/main/java/org/mitre/synthea/export/rif/tools/BB2RIF2CCW.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
package org.mitre.synthea.export.rif.tools;

import static org.mitre.synthea.export.rif.BB2RIFStructure.RIF_FILES;

import com.fasterxml.jackson.databind.MappingIterator;
import com.fasterxml.jackson.databind.SequenceWriter;
import com.fasterxml.jackson.dataformat.csv.CsvMapper;
import com.fasterxml.jackson.dataformat.csv.CsvSchema;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import org.mitre.synthea.export.Exporter;
import org.mitre.synthea.helpers.SimpleCSV;
import org.mitre.synthea.helpers.Utilities;

/**
* Utility for converting from BB2 RIF file format to CCW RIF file format.
*/
public class BB2RIF2CCW {

/**
* Convert BB2 RIF file to the CCW RIF format.
* Read in each BB2 RIF file from output/bfd, remove any unmappable columns, rename
* mappable columns, then write the result to output/ccw.
* @param args unused
*/
public static void main(String[] args) {
File inputDir = Exporter.getOutputFolder("bfd", null);
File outputDir = Exporter.getOutputFolder("ccw", null);
outputDir.mkdirs();
for (Class<?> rifFile: RIF_FILES) {
String filePrefix = rifFile.getSimpleName().toLowerCase();
try {
Map<String, String> nameMap = readMapFile(filePrefix);
for (File file: getSourceFiles(filePrefix, inputDir)) {
System.out.println("Converting " + file.toString());
convertFile(file, outputDir, nameMap);
}
} catch (IOException | IllegalArgumentException ex) {
System.out.println("Warning, skipping " + filePrefix + ": " + ex.getMessage());
}
}
}

private static void convertFile(File file, File outputDir, Map<String, String> nameMap) {
try {
CsvMapper mapper = new CsvMapper();
CsvSchema schema = CsvSchema.emptySchema().withHeader().withColumnSeparator('|');
MappingIterator<LinkedHashMap<String, String>> sourceRows = mapper
.readerFor(LinkedHashMap.class).with(schema).readValues(file);
boolean firstOutputRow = true;
SequenceWriter writer = null;
File outputFile = outputDir.toPath().resolve(file.getName()).toFile();
while (sourceRows.hasNextValue()) {
LinkedHashMap<String, String> outputRow = transformRow(sourceRows.next(), nameMap);
if (firstOutputRow) {
CsvSchema.Builder schemaBuilder = CsvSchema.builder();
schemaBuilder.setUseHeader(true).setColumnSeparator('|').disableQuoteChar();
schemaBuilder.addColumns(outputRow.keySet(), CsvSchema.ColumnType.STRING);
writer = mapper.writer(schemaBuilder.build()).writeValues(outputFile);
firstOutputRow = false;
}
writer.write(outputRow);
}
if (writer != null) {
writer.close();
}
sourceRows.close();
} catch (IOException ex) {
System.out.println("Error, skipping " + file.getName() + ": " + ex.getMessage());;
}
}

private static LinkedHashMap<String, String> transformRow(LinkedHashMap<String, String> row,
Map<String, String> nameMap) {
LinkedHashMap<String, String> transformedRow = new LinkedHashMap<>();
row.keySet().forEach(bb2FieldName -> {
String ccwFieldName = nameMap.get(bb2FieldName);
if (ccwFieldName != null && ccwFieldName.length() > 0) {
transformedRow.put(ccwFieldName, row.get(bb2FieldName));
}
});
return transformedRow;
}

private static Map<String, String> readMapFile(String filePrefix) throws IOException {
String csvStr = Utilities.readResource("export/" + filePrefix + "_bb2_ccw.csv");
List<LinkedHashMap<String,String>> csv = SimpleCSV.parse(csvStr);
HashMap<String, String> map = new HashMap<>();
csv.forEach(entry -> {
map.put(entry.get("BB2"), entry.get("CCW"));
});
return map;
}

private static File[] getSourceFiles(String filePrefix, File inputDir) {
if (filePrefix.equals("beneficiary")) {
return inputDir.listFiles((dir, filename) -> {
return filename.startsWith(filePrefix) && !filename.contains("history");
});
} else {
return inputDir.listFiles((dir, filename) -> {
return filename.startsWith(filePrefix);
});
}
}
}
211 changes: 211 additions & 0 deletions src/main/resources/export/beneficiary_bb2_ccw.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
BB2,CCW
AGE,AGE_AT_END_REF_YR
A_MO_CNT,BENE_HI_CVRAGE_TOT_MONS
BENE_BIRTH_DT,BENE_BIRTH_DT
BENE_COUNTY_CD,COUNTY_CD
BENE_CRNT_HIC_NUM,
BENE_ENTLMT_RSN_CURR,ENTLMT_RSN_CURR
BENE_ENTLMT_RSN_ORIG,ENTLMT_RSN_ORIG
BENE_ESRD_IND,ESRD_IND
BENE_GVN_NAME,
BENE_ID,BENE_ID
BENE_LINK_KEY,
BENE_MDCR_STATUS_CD,
BENE_MDL_NAME,
BENE_PTA_TRMNTN_CD,BENE_PTA_TRMNTN_CD
BENE_PTB_TRMNTN_CD,BENE_PTB_TRMNTN_CD
BENE_RACE_CD,BENE_RACE_CD
BENE_SEX_IDENT_CD,SEX_IDENT_CD
BENE_SRNM_NAME,
BENE_ZIP_CD,ZIP_CD
BUYIN_MO_CNT,BENE_STATE_BUYIN_TOT_MONS
B_MO_CNT,BENE_SMI_CVRAGE_TOT_MONS
CITY_NAME,
COVSTART,COVSTART
CRNT_BIC,CRNT_BIC_CD
CST_SHR_GRP_APR_CD,CST_SHR_GRP_CD_04
CST_SHR_GRP_AUG_CD,CST_SHR_GRP_CD_08
CST_SHR_GRP_DEC_CD,CST_SHR_GRP_CD_12
CST_SHR_GRP_FEB_CD,CST_SHR_GRP_CD_02
CST_SHR_GRP_JAN_CD,CST_SHR_GRP_CD_01
CST_SHR_GRP_JUL_CD,CST_SHR_GRP_CD_07
CST_SHR_GRP_JUN_CD,CST_SHR_GRP_CD_06
CST_SHR_GRP_MAR_CD,CST_SHR_GRP_CD_03
CST_SHR_GRP_MAY_CD,CST_SHR_GRP_CD_05
CST_SHR_GRP_NOV_CD,CST_SHR_GRP_CD_11
CST_SHR_GRP_OCT_CD,CST_SHR_GRP_CD_10
CST_SHR_GRP_SEPT_CD,CST_SHR_GRP_CD_09
DEATH_DT,BENE_DEATH_DT
DML_IND,
DRVD_LINE_1_ADR,
DRVD_LINE_2_ADR,
DRVD_LINE_3_ADR,
DRVD_LINE_4_ADR,
DRVD_LINE_5_ADR,
DRVD_LINE_6_ADR,
DUAL_MO_CNT,DUAL_ELGBL_MONS
EFCTV_BGN_DT,
EFCTV_END_DT,
EFIVEPCT,ENHANCED_FIVE_PERCENT_FLAG
ENRL_SRC,ENRL_SRC
FIPS_STATE_CNTY_APR_CD,STATE_CNTY_FIPS_CD_04
FIPS_STATE_CNTY_AUG_CD,STATE_CNTY_FIPS_CD_08
FIPS_STATE_CNTY_DEC_CD,STATE_CNTY_FIPS_CD_12
FIPS_STATE_CNTY_FEB_CD,STATE_CNTY_FIPS_CD_02
FIPS_STATE_CNTY_JAN_CD,STATE_CNTY_FIPS_CD_01
FIPS_STATE_CNTY_JUL_CD,STATE_CNTY_FIPS_CD_07
FIPS_STATE_CNTY_JUN_CD,STATE_CNTY_FIPS_CD_06
FIPS_STATE_CNTY_MAR_CD,STATE_CNTY_FIPS_CD_03
FIPS_STATE_CNTY_MAY_CD,STATE_CNTY_FIPS_CD_05
FIPS_STATE_CNTY_NOV_CD,STATE_CNTY_FIPS_CD_11
FIPS_STATE_CNTY_OCT_CD,STATE_CNTY_FIPS_CD_10
FIPS_STATE_CNTY_SEPT_CD,STATE_CNTY_FIPS_CD_09
HMO_10_IND,HMO_IND_10
HMO_11_IND,HMO_IND_11
HMO_12_IND,HMO_IND_12
HMO_1_IND,HMO_IND_01
HMO_2_IND,HMO_IND_02
HMO_3_IND,HMO_IND_03
HMO_4_IND,HMO_IND_04
HMO_5_IND,HMO_IND_05
HMO_6_IND,HMO_IND_06
HMO_7_IND,HMO_IND_07
HMO_8_IND,HMO_IND_08
HMO_9_IND,HMO_IND_09
HMO_MO_CNT,BENE_HMO_CVRAGE_TOT_MONS
MBI_NUM,
MDCR_ENTLMT_BUYIN_10_IND,MDCR_ENTLMT_BUYIN_IND_10
MDCR_ENTLMT_BUYIN_11_IND,MDCR_ENTLMT_BUYIN_IND_11
MDCR_ENTLMT_BUYIN_12_IND,MDCR_ENTLMT_BUYIN_IND_12
MDCR_ENTLMT_BUYIN_1_IND,MDCR_ENTLMT_BUYIN_IND_01
MDCR_ENTLMT_BUYIN_2_IND,MDCR_ENTLMT_BUYIN_IND_02
MDCR_ENTLMT_BUYIN_3_IND,MDCR_ENTLMT_BUYIN_IND_03
MDCR_ENTLMT_BUYIN_4_IND,MDCR_ENTLMT_BUYIN_IND_04
MDCR_ENTLMT_BUYIN_5_IND,MDCR_ENTLMT_BUYIN_IND_05
MDCR_ENTLMT_BUYIN_6_IND,MDCR_ENTLMT_BUYIN_IND_06
MDCR_ENTLMT_BUYIN_7_IND,MDCR_ENTLMT_BUYIN_IND_07
MDCR_ENTLMT_BUYIN_8_IND,MDCR_ENTLMT_BUYIN_IND_08
MDCR_ENTLMT_BUYIN_9_IND,MDCR_ENTLMT_BUYIN_IND_09
MDCR_STUS_APR_CD,MDCR_STATUS_CODE_04
MDCR_STUS_AUG_CD,MDCR_STATUS_CODE_08
MDCR_STUS_DEC_CD,MDCR_STATUS_CODE_12
MDCR_STUS_FEB_CD,MDCR_STATUS_CODE_02
MDCR_STUS_JAN_CD,MDCR_STATUS_CODE_01
MDCR_STUS_JUL_CD,MDCR_STATUS_CODE_07
MDCR_STUS_JUN_CD,MDCR_STATUS_CODE_06
MDCR_STUS_MAR_CD,MDCR_STATUS_CODE_03
MDCR_STUS_MAY_CD,MDCR_STATUS_CODE_05
MDCR_STUS_NOV_CD,MDCR_STATUS_CODE_11
MDCR_STUS_OCT_CD,MDCR_STATUS_CODE_10
MDCR_STUS_SEPT_CD,MDCR_STATUS_CODE_09
META_DUAL_ELGBL_STUS_APR_CD,DUAL_STUS_CD_04
META_DUAL_ELGBL_STUS_AUG_CD,DUAL_STUS_CD_08
META_DUAL_ELGBL_STUS_DEC_CD,DUAL_STUS_CD_12
META_DUAL_ELGBL_STUS_FEB_CD,DUAL_STUS_CD_02
META_DUAL_ELGBL_STUS_JAN_CD,DUAL_STUS_CD_01
META_DUAL_ELGBL_STUS_JUL_CD,DUAL_STUS_CD_07
META_DUAL_ELGBL_STUS_JUN_CD,DUAL_STUS_CD_06
META_DUAL_ELGBL_STUS_MAR_CD,DUAL_STUS_CD_03
META_DUAL_ELGBL_STUS_MAY_CD,DUAL_STUS_CD_05
META_DUAL_ELGBL_STUS_NOV_CD,DUAL_STUS_CD_11
META_DUAL_ELGBL_STUS_OCT_CD,DUAL_STUS_CD_10
META_DUAL_ELGBL_STUS_SEPT_CD,DUAL_STUS_CD_09
PLAN_CVRG_MO_CNT,PTD_PLAN_CVRG_MONS
PTA_CVRG_END_DT,
PTA_CVRG_STRT_DT,
PTB_CVRG_END_DT,
PTB_CVRG_STRT_DT,
PTC_CNTRCT_APR_ID,PTC_CNTRCT_ID_04
PTC_CNTRCT_AUG_ID,PTC_CNTRCT_ID_08
PTC_CNTRCT_DEC_ID,PTC_CNTRCT_ID_12
PTC_CNTRCT_FEB_ID,PTC_CNTRCT_ID_02
PTC_CNTRCT_JAN_ID,PTC_CNTRCT_ID_01
PTC_CNTRCT_JUL_ID,PTC_CNTRCT_ID_07
PTC_CNTRCT_JUN_ID,PTC_CNTRCT_ID_06
PTC_CNTRCT_MAR_ID,PTC_CNTRCT_ID_03
PTC_CNTRCT_MAY_ID,PTC_CNTRCT_ID_05
PTC_CNTRCT_NOV_ID,PTC_CNTRCT_ID_11
PTC_CNTRCT_OCT_ID,PTC_CNTRCT_ID_10
PTC_CNTRCT_SEPT_ID,PTC_CNTRCT_ID_09
PTC_PBP_APR_ID,PTC_PBP_ID_04
PTC_PBP_AUG_ID,PTC_PBP_ID_08
PTC_PBP_DEC_ID,PTC_PBP_ID_12
PTC_PBP_FEB_ID,PTC_PBP_ID_02
PTC_PBP_JAN_ID,PTC_PBP_ID_01
PTC_PBP_JUL_ID,PTC_PBP_ID_07
PTC_PBP_JUN_ID,PTC_PBP_ID_06
PTC_PBP_MAR_ID,PTC_PBP_ID_03
PTC_PBP_MAY_ID,PTC_PBP_ID_05
PTC_PBP_NOV_ID,PTC_PBP_ID_11
PTC_PBP_OCT_ID,PTC_PBP_ID_10
PTC_PBP_SEPT_ID,PTC_PBP_ID_09
PTC_PLAN_TYPE_APR_CD,PTC_PLAN_TYPE_CD_04
PTC_PLAN_TYPE_AUG_CD,PTC_PLAN_TYPE_CD_08
PTC_PLAN_TYPE_DEC_CD,PTC_PLAN_TYPE_CD_12
PTC_PLAN_TYPE_FEB_CD,PTC_PLAN_TYPE_CD_02
PTC_PLAN_TYPE_JAN_CD,PTC_PLAN_TYPE_CD_01
PTC_PLAN_TYPE_JUL_CD,PTC_PLAN_TYPE_CD_07
PTC_PLAN_TYPE_JUN_CD,PTC_PLAN_TYPE_CD_06
PTC_PLAN_TYPE_MAR_CD,PTC_PLAN_TYPE_CD_03
PTC_PLAN_TYPE_MAY_CD,PTC_PLAN_TYPE_CD_05
PTC_PLAN_TYPE_NOV_CD,PTC_PLAN_TYPE_CD_11
PTC_PLAN_TYPE_OCT_CD,PTC_PLAN_TYPE_CD_10
PTC_PLAN_TYPE_SEPT_CD,PTC_PLAN_TYPE_CD_09
PTD_CNTRCT_APR_ID,PTD_CNTRCT_ID_04
PTD_CNTRCT_AUG_ID,PTD_CNTRCT_ID_08
PTD_CNTRCT_DEC_ID,PTD_CNTRCT_ID_12
PTD_CNTRCT_FEB_ID,PTD_CNTRCT_ID_02
PTD_CNTRCT_JAN_ID,PTD_CNTRCT_ID_01
PTD_CNTRCT_JUL_ID,PTD_CNTRCT_ID_07
PTD_CNTRCT_JUN_ID,PTD_CNTRCT_ID_06
PTD_CNTRCT_MAR_ID,PTD_CNTRCT_ID_03
PTD_CNTRCT_MAY_ID,PTD_CNTRCT_ID_05
PTD_CNTRCT_NOV_ID,PTD_CNTRCT_ID_11
PTD_CNTRCT_OCT_ID,PTD_CNTRCT_ID_10
PTD_CNTRCT_SEPT_ID,PTD_CNTRCT_ID_09
PTD_CVRG_END_DT,
PTD_CVRG_STRT_DT,
PTD_PBP_APR_ID,PTD_PBP_ID_04
PTD_PBP_AUG_ID,PTD_PBP_ID_08
PTD_PBP_DEC_ID,PTD_PBP_ID_12
PTD_PBP_FEB_ID,PTD_PBP_ID_02
PTD_PBP_JAN_ID,PTD_PBP_ID_01
PTD_PBP_JUL_ID,PTD_PBP_ID_07
PTD_PBP_JUN_ID,PTD_PBP_ID_06
PTD_PBP_MAR_ID,PTD_PBP_ID_03
PTD_PBP_MAY_ID,PTD_PBP_ID_05
PTD_PBP_NOV_ID,PTD_PBP_ID_11
PTD_PBP_OCT_ID,PTD_PBP_ID_10
PTD_PBP_SEPT_ID,PTD_PBP_ID_09
PTD_SGMT_APR_ID,PTD_SGMT_ID_04
PTD_SGMT_AUG_ID,PTD_SGMT_ID_08
PTD_SGMT_DEC_ID,PTD_SGMT_ID_12
PTD_SGMT_FEB_ID,PTD_SGMT_ID_02
PTD_SGMT_JAN_ID,PTD_SGMT_ID_01
PTD_SGMT_JUL_ID,PTD_SGMT_ID_07
PTD_SGMT_JUN_ID,PTD_SGMT_ID_06
PTD_SGMT_MAR_ID,PTD_SGMT_ID_03
PTD_SGMT_MAY_ID,PTD_SGMT_ID_05
PTD_SGMT_NOV_ID,PTD_SGMT_ID_11
PTD_SGMT_OCT_ID,PTD_SGMT_ID_10
PTD_SGMT_SEPT_ID,PTD_SGMT_ID_09
RDS_APR_IND,RDS_IND_04
RDS_AUG_IND,RDS_IND_08
RDS_DEC_IND,RDS_IND_12
RDS_FEB_IND,RDS_IND_02
RDS_JAN_IND,RDS_IND_01
RDS_JUL_IND,RDS_IND_07
RDS_JUN_IND,RDS_IND_06
RDS_MAR_IND,RDS_IND_03
RDS_MAY_IND,RDS_IND_05
RDS_MO_CNT,RDS_CVRG_MONS
RDS_NOV_IND,RDS_IND_11
RDS_OCT_IND,RDS_IND_10
RDS_SEPT_IND,RDS_IND_09
RFRNC_YR,BENE_ENROLLMT_REF_YR
RTI_RACE_CD,RTI_RACE_CD
SAMPLE_GROUP,SAMPLE_GROUP
STATE_CD,
STATE_CNTY_ZIP_CD,
STATE_CODE,STATE_CODE
V_DOD_SW,VALID_DEATH_DT_SW
Loading

0 comments on commit 340615e

Please sign in to comment.