-
Notifications
You must be signed in to change notification settings - Fork 680
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1225 from synthetichealth/bfd_to_ccw
Utility to map from the BB2 RIF format to the CCW RIF format.
- Loading branch information
Showing
11 changed files
with
1,561 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
110 changes: 110 additions & 0 deletions
110
src/main/java/org/mitre/synthea/export/rif/tools/BB2RIF2CCW.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
package org.mitre.synthea.export.rif.tools; | ||
|
||
import static org.mitre.synthea.export.rif.BB2RIFStructure.RIF_FILES; | ||
|
||
import com.fasterxml.jackson.databind.MappingIterator; | ||
import com.fasterxml.jackson.databind.SequenceWriter; | ||
import com.fasterxml.jackson.dataformat.csv.CsvMapper; | ||
import com.fasterxml.jackson.dataformat.csv.CsvSchema; | ||
import java.io.File; | ||
import java.io.IOException; | ||
import java.util.HashMap; | ||
import java.util.LinkedHashMap; | ||
import java.util.List; | ||
import java.util.Map; | ||
import org.mitre.synthea.export.Exporter; | ||
import org.mitre.synthea.helpers.SimpleCSV; | ||
import org.mitre.synthea.helpers.Utilities; | ||
|
||
/** | ||
* Utility for converting from BB2 RIF file format to CCW RIF file format. | ||
*/ | ||
public class BB2RIF2CCW { | ||
|
||
/** | ||
* Convert BB2 RIF file to the CCW RIF format. | ||
* Read in each BB2 RIF file from output/bfd, remove any unmappable columns, rename | ||
* mappable columns, then write the result to output/ccw. | ||
* @param args unused | ||
*/ | ||
public static void main(String[] args) { | ||
File inputDir = Exporter.getOutputFolder("bfd", null); | ||
File outputDir = Exporter.getOutputFolder("ccw", null); | ||
outputDir.mkdirs(); | ||
for (Class<?> rifFile: RIF_FILES) { | ||
String filePrefix = rifFile.getSimpleName().toLowerCase(); | ||
try { | ||
Map<String, String> nameMap = readMapFile(filePrefix); | ||
for (File file: getSourceFiles(filePrefix, inputDir)) { | ||
System.out.println("Converting " + file.toString()); | ||
convertFile(file, outputDir, nameMap); | ||
} | ||
} catch (IOException | IllegalArgumentException ex) { | ||
System.out.println("Warning, skipping " + filePrefix + ": " + ex.getMessage()); | ||
} | ||
} | ||
} | ||
|
||
private static void convertFile(File file, File outputDir, Map<String, String> nameMap) { | ||
try { | ||
CsvMapper mapper = new CsvMapper(); | ||
CsvSchema schema = CsvSchema.emptySchema().withHeader().withColumnSeparator('|'); | ||
MappingIterator<LinkedHashMap<String, String>> sourceRows = mapper | ||
.readerFor(LinkedHashMap.class).with(schema).readValues(file); | ||
boolean firstOutputRow = true; | ||
SequenceWriter writer = null; | ||
File outputFile = outputDir.toPath().resolve(file.getName()).toFile(); | ||
while (sourceRows.hasNextValue()) { | ||
LinkedHashMap<String, String> outputRow = transformRow(sourceRows.next(), nameMap); | ||
if (firstOutputRow) { | ||
CsvSchema.Builder schemaBuilder = CsvSchema.builder(); | ||
schemaBuilder.setUseHeader(true).setColumnSeparator('|').disableQuoteChar(); | ||
schemaBuilder.addColumns(outputRow.keySet(), CsvSchema.ColumnType.STRING); | ||
writer = mapper.writer(schemaBuilder.build()).writeValues(outputFile); | ||
firstOutputRow = false; | ||
} | ||
writer.write(outputRow); | ||
} | ||
if (writer != null) { | ||
writer.close(); | ||
} | ||
sourceRows.close(); | ||
} catch (IOException ex) { | ||
System.out.println("Error, skipping " + file.getName() + ": " + ex.getMessage());; | ||
} | ||
} | ||
|
||
private static LinkedHashMap<String, String> transformRow(LinkedHashMap<String, String> row, | ||
Map<String, String> nameMap) { | ||
LinkedHashMap<String, String> transformedRow = new LinkedHashMap<>(); | ||
row.keySet().forEach(bb2FieldName -> { | ||
String ccwFieldName = nameMap.get(bb2FieldName); | ||
if (ccwFieldName != null && ccwFieldName.length() > 0) { | ||
transformedRow.put(ccwFieldName, row.get(bb2FieldName)); | ||
} | ||
}); | ||
return transformedRow; | ||
} | ||
|
||
private static Map<String, String> readMapFile(String filePrefix) throws IOException { | ||
String csvStr = Utilities.readResource("export/" + filePrefix + "_bb2_ccw.csv"); | ||
List<LinkedHashMap<String,String>> csv = SimpleCSV.parse(csvStr); | ||
HashMap<String, String> map = new HashMap<>(); | ||
csv.forEach(entry -> { | ||
map.put(entry.get("BB2"), entry.get("CCW")); | ||
}); | ||
return map; | ||
} | ||
|
||
private static File[] getSourceFiles(String filePrefix, File inputDir) { | ||
if (filePrefix.equals("beneficiary")) { | ||
return inputDir.listFiles((dir, filename) -> { | ||
return filename.startsWith(filePrefix) && !filename.contains("history"); | ||
}); | ||
} else { | ||
return inputDir.listFiles((dir, filename) -> { | ||
return filename.startsWith(filePrefix); | ||
}); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,211 @@ | ||
BB2,CCW | ||
AGE,AGE_AT_END_REF_YR | ||
A_MO_CNT,BENE_HI_CVRAGE_TOT_MONS | ||
BENE_BIRTH_DT,BENE_BIRTH_DT | ||
BENE_COUNTY_CD,COUNTY_CD | ||
BENE_CRNT_HIC_NUM, | ||
BENE_ENTLMT_RSN_CURR,ENTLMT_RSN_CURR | ||
BENE_ENTLMT_RSN_ORIG,ENTLMT_RSN_ORIG | ||
BENE_ESRD_IND,ESRD_IND | ||
BENE_GVN_NAME, | ||
BENE_ID,BENE_ID | ||
BENE_LINK_KEY, | ||
BENE_MDCR_STATUS_CD, | ||
BENE_MDL_NAME, | ||
BENE_PTA_TRMNTN_CD,BENE_PTA_TRMNTN_CD | ||
BENE_PTB_TRMNTN_CD,BENE_PTB_TRMNTN_CD | ||
BENE_RACE_CD,BENE_RACE_CD | ||
BENE_SEX_IDENT_CD,SEX_IDENT_CD | ||
BENE_SRNM_NAME, | ||
BENE_ZIP_CD,ZIP_CD | ||
BUYIN_MO_CNT,BENE_STATE_BUYIN_TOT_MONS | ||
B_MO_CNT,BENE_SMI_CVRAGE_TOT_MONS | ||
CITY_NAME, | ||
COVSTART,COVSTART | ||
CRNT_BIC,CRNT_BIC_CD | ||
CST_SHR_GRP_APR_CD,CST_SHR_GRP_CD_04 | ||
CST_SHR_GRP_AUG_CD,CST_SHR_GRP_CD_08 | ||
CST_SHR_GRP_DEC_CD,CST_SHR_GRP_CD_12 | ||
CST_SHR_GRP_FEB_CD,CST_SHR_GRP_CD_02 | ||
CST_SHR_GRP_JAN_CD,CST_SHR_GRP_CD_01 | ||
CST_SHR_GRP_JUL_CD,CST_SHR_GRP_CD_07 | ||
CST_SHR_GRP_JUN_CD,CST_SHR_GRP_CD_06 | ||
CST_SHR_GRP_MAR_CD,CST_SHR_GRP_CD_03 | ||
CST_SHR_GRP_MAY_CD,CST_SHR_GRP_CD_05 | ||
CST_SHR_GRP_NOV_CD,CST_SHR_GRP_CD_11 | ||
CST_SHR_GRP_OCT_CD,CST_SHR_GRP_CD_10 | ||
CST_SHR_GRP_SEPT_CD,CST_SHR_GRP_CD_09 | ||
DEATH_DT,BENE_DEATH_DT | ||
DML_IND, | ||
DRVD_LINE_1_ADR, | ||
DRVD_LINE_2_ADR, | ||
DRVD_LINE_3_ADR, | ||
DRVD_LINE_4_ADR, | ||
DRVD_LINE_5_ADR, | ||
DRVD_LINE_6_ADR, | ||
DUAL_MO_CNT,DUAL_ELGBL_MONS | ||
EFCTV_BGN_DT, | ||
EFCTV_END_DT, | ||
EFIVEPCT,ENHANCED_FIVE_PERCENT_FLAG | ||
ENRL_SRC,ENRL_SRC | ||
FIPS_STATE_CNTY_APR_CD,STATE_CNTY_FIPS_CD_04 | ||
FIPS_STATE_CNTY_AUG_CD,STATE_CNTY_FIPS_CD_08 | ||
FIPS_STATE_CNTY_DEC_CD,STATE_CNTY_FIPS_CD_12 | ||
FIPS_STATE_CNTY_FEB_CD,STATE_CNTY_FIPS_CD_02 | ||
FIPS_STATE_CNTY_JAN_CD,STATE_CNTY_FIPS_CD_01 | ||
FIPS_STATE_CNTY_JUL_CD,STATE_CNTY_FIPS_CD_07 | ||
FIPS_STATE_CNTY_JUN_CD,STATE_CNTY_FIPS_CD_06 | ||
FIPS_STATE_CNTY_MAR_CD,STATE_CNTY_FIPS_CD_03 | ||
FIPS_STATE_CNTY_MAY_CD,STATE_CNTY_FIPS_CD_05 | ||
FIPS_STATE_CNTY_NOV_CD,STATE_CNTY_FIPS_CD_11 | ||
FIPS_STATE_CNTY_OCT_CD,STATE_CNTY_FIPS_CD_10 | ||
FIPS_STATE_CNTY_SEPT_CD,STATE_CNTY_FIPS_CD_09 | ||
HMO_10_IND,HMO_IND_10 | ||
HMO_11_IND,HMO_IND_11 | ||
HMO_12_IND,HMO_IND_12 | ||
HMO_1_IND,HMO_IND_01 | ||
HMO_2_IND,HMO_IND_02 | ||
HMO_3_IND,HMO_IND_03 | ||
HMO_4_IND,HMO_IND_04 | ||
HMO_5_IND,HMO_IND_05 | ||
HMO_6_IND,HMO_IND_06 | ||
HMO_7_IND,HMO_IND_07 | ||
HMO_8_IND,HMO_IND_08 | ||
HMO_9_IND,HMO_IND_09 | ||
HMO_MO_CNT,BENE_HMO_CVRAGE_TOT_MONS | ||
MBI_NUM, | ||
MDCR_ENTLMT_BUYIN_10_IND,MDCR_ENTLMT_BUYIN_IND_10 | ||
MDCR_ENTLMT_BUYIN_11_IND,MDCR_ENTLMT_BUYIN_IND_11 | ||
MDCR_ENTLMT_BUYIN_12_IND,MDCR_ENTLMT_BUYIN_IND_12 | ||
MDCR_ENTLMT_BUYIN_1_IND,MDCR_ENTLMT_BUYIN_IND_01 | ||
MDCR_ENTLMT_BUYIN_2_IND,MDCR_ENTLMT_BUYIN_IND_02 | ||
MDCR_ENTLMT_BUYIN_3_IND,MDCR_ENTLMT_BUYIN_IND_03 | ||
MDCR_ENTLMT_BUYIN_4_IND,MDCR_ENTLMT_BUYIN_IND_04 | ||
MDCR_ENTLMT_BUYIN_5_IND,MDCR_ENTLMT_BUYIN_IND_05 | ||
MDCR_ENTLMT_BUYIN_6_IND,MDCR_ENTLMT_BUYIN_IND_06 | ||
MDCR_ENTLMT_BUYIN_7_IND,MDCR_ENTLMT_BUYIN_IND_07 | ||
MDCR_ENTLMT_BUYIN_8_IND,MDCR_ENTLMT_BUYIN_IND_08 | ||
MDCR_ENTLMT_BUYIN_9_IND,MDCR_ENTLMT_BUYIN_IND_09 | ||
MDCR_STUS_APR_CD,MDCR_STATUS_CODE_04 | ||
MDCR_STUS_AUG_CD,MDCR_STATUS_CODE_08 | ||
MDCR_STUS_DEC_CD,MDCR_STATUS_CODE_12 | ||
MDCR_STUS_FEB_CD,MDCR_STATUS_CODE_02 | ||
MDCR_STUS_JAN_CD,MDCR_STATUS_CODE_01 | ||
MDCR_STUS_JUL_CD,MDCR_STATUS_CODE_07 | ||
MDCR_STUS_JUN_CD,MDCR_STATUS_CODE_06 | ||
MDCR_STUS_MAR_CD,MDCR_STATUS_CODE_03 | ||
MDCR_STUS_MAY_CD,MDCR_STATUS_CODE_05 | ||
MDCR_STUS_NOV_CD,MDCR_STATUS_CODE_11 | ||
MDCR_STUS_OCT_CD,MDCR_STATUS_CODE_10 | ||
MDCR_STUS_SEPT_CD,MDCR_STATUS_CODE_09 | ||
META_DUAL_ELGBL_STUS_APR_CD,DUAL_STUS_CD_04 | ||
META_DUAL_ELGBL_STUS_AUG_CD,DUAL_STUS_CD_08 | ||
META_DUAL_ELGBL_STUS_DEC_CD,DUAL_STUS_CD_12 | ||
META_DUAL_ELGBL_STUS_FEB_CD,DUAL_STUS_CD_02 | ||
META_DUAL_ELGBL_STUS_JAN_CD,DUAL_STUS_CD_01 | ||
META_DUAL_ELGBL_STUS_JUL_CD,DUAL_STUS_CD_07 | ||
META_DUAL_ELGBL_STUS_JUN_CD,DUAL_STUS_CD_06 | ||
META_DUAL_ELGBL_STUS_MAR_CD,DUAL_STUS_CD_03 | ||
META_DUAL_ELGBL_STUS_MAY_CD,DUAL_STUS_CD_05 | ||
META_DUAL_ELGBL_STUS_NOV_CD,DUAL_STUS_CD_11 | ||
META_DUAL_ELGBL_STUS_OCT_CD,DUAL_STUS_CD_10 | ||
META_DUAL_ELGBL_STUS_SEPT_CD,DUAL_STUS_CD_09 | ||
PLAN_CVRG_MO_CNT,PTD_PLAN_CVRG_MONS | ||
PTA_CVRG_END_DT, | ||
PTA_CVRG_STRT_DT, | ||
PTB_CVRG_END_DT, | ||
PTB_CVRG_STRT_DT, | ||
PTC_CNTRCT_APR_ID,PTC_CNTRCT_ID_04 | ||
PTC_CNTRCT_AUG_ID,PTC_CNTRCT_ID_08 | ||
PTC_CNTRCT_DEC_ID,PTC_CNTRCT_ID_12 | ||
PTC_CNTRCT_FEB_ID,PTC_CNTRCT_ID_02 | ||
PTC_CNTRCT_JAN_ID,PTC_CNTRCT_ID_01 | ||
PTC_CNTRCT_JUL_ID,PTC_CNTRCT_ID_07 | ||
PTC_CNTRCT_JUN_ID,PTC_CNTRCT_ID_06 | ||
PTC_CNTRCT_MAR_ID,PTC_CNTRCT_ID_03 | ||
PTC_CNTRCT_MAY_ID,PTC_CNTRCT_ID_05 | ||
PTC_CNTRCT_NOV_ID,PTC_CNTRCT_ID_11 | ||
PTC_CNTRCT_OCT_ID,PTC_CNTRCT_ID_10 | ||
PTC_CNTRCT_SEPT_ID,PTC_CNTRCT_ID_09 | ||
PTC_PBP_APR_ID,PTC_PBP_ID_04 | ||
PTC_PBP_AUG_ID,PTC_PBP_ID_08 | ||
PTC_PBP_DEC_ID,PTC_PBP_ID_12 | ||
PTC_PBP_FEB_ID,PTC_PBP_ID_02 | ||
PTC_PBP_JAN_ID,PTC_PBP_ID_01 | ||
PTC_PBP_JUL_ID,PTC_PBP_ID_07 | ||
PTC_PBP_JUN_ID,PTC_PBP_ID_06 | ||
PTC_PBP_MAR_ID,PTC_PBP_ID_03 | ||
PTC_PBP_MAY_ID,PTC_PBP_ID_05 | ||
PTC_PBP_NOV_ID,PTC_PBP_ID_11 | ||
PTC_PBP_OCT_ID,PTC_PBP_ID_10 | ||
PTC_PBP_SEPT_ID,PTC_PBP_ID_09 | ||
PTC_PLAN_TYPE_APR_CD,PTC_PLAN_TYPE_CD_04 | ||
PTC_PLAN_TYPE_AUG_CD,PTC_PLAN_TYPE_CD_08 | ||
PTC_PLAN_TYPE_DEC_CD,PTC_PLAN_TYPE_CD_12 | ||
PTC_PLAN_TYPE_FEB_CD,PTC_PLAN_TYPE_CD_02 | ||
PTC_PLAN_TYPE_JAN_CD,PTC_PLAN_TYPE_CD_01 | ||
PTC_PLAN_TYPE_JUL_CD,PTC_PLAN_TYPE_CD_07 | ||
PTC_PLAN_TYPE_JUN_CD,PTC_PLAN_TYPE_CD_06 | ||
PTC_PLAN_TYPE_MAR_CD,PTC_PLAN_TYPE_CD_03 | ||
PTC_PLAN_TYPE_MAY_CD,PTC_PLAN_TYPE_CD_05 | ||
PTC_PLAN_TYPE_NOV_CD,PTC_PLAN_TYPE_CD_11 | ||
PTC_PLAN_TYPE_OCT_CD,PTC_PLAN_TYPE_CD_10 | ||
PTC_PLAN_TYPE_SEPT_CD,PTC_PLAN_TYPE_CD_09 | ||
PTD_CNTRCT_APR_ID,PTD_CNTRCT_ID_04 | ||
PTD_CNTRCT_AUG_ID,PTD_CNTRCT_ID_08 | ||
PTD_CNTRCT_DEC_ID,PTD_CNTRCT_ID_12 | ||
PTD_CNTRCT_FEB_ID,PTD_CNTRCT_ID_02 | ||
PTD_CNTRCT_JAN_ID,PTD_CNTRCT_ID_01 | ||
PTD_CNTRCT_JUL_ID,PTD_CNTRCT_ID_07 | ||
PTD_CNTRCT_JUN_ID,PTD_CNTRCT_ID_06 | ||
PTD_CNTRCT_MAR_ID,PTD_CNTRCT_ID_03 | ||
PTD_CNTRCT_MAY_ID,PTD_CNTRCT_ID_05 | ||
PTD_CNTRCT_NOV_ID,PTD_CNTRCT_ID_11 | ||
PTD_CNTRCT_OCT_ID,PTD_CNTRCT_ID_10 | ||
PTD_CNTRCT_SEPT_ID,PTD_CNTRCT_ID_09 | ||
PTD_CVRG_END_DT, | ||
PTD_CVRG_STRT_DT, | ||
PTD_PBP_APR_ID,PTD_PBP_ID_04 | ||
PTD_PBP_AUG_ID,PTD_PBP_ID_08 | ||
PTD_PBP_DEC_ID,PTD_PBP_ID_12 | ||
PTD_PBP_FEB_ID,PTD_PBP_ID_02 | ||
PTD_PBP_JAN_ID,PTD_PBP_ID_01 | ||
PTD_PBP_JUL_ID,PTD_PBP_ID_07 | ||
PTD_PBP_JUN_ID,PTD_PBP_ID_06 | ||
PTD_PBP_MAR_ID,PTD_PBP_ID_03 | ||
PTD_PBP_MAY_ID,PTD_PBP_ID_05 | ||
PTD_PBP_NOV_ID,PTD_PBP_ID_11 | ||
PTD_PBP_OCT_ID,PTD_PBP_ID_10 | ||
PTD_PBP_SEPT_ID,PTD_PBP_ID_09 | ||
PTD_SGMT_APR_ID,PTD_SGMT_ID_04 | ||
PTD_SGMT_AUG_ID,PTD_SGMT_ID_08 | ||
PTD_SGMT_DEC_ID,PTD_SGMT_ID_12 | ||
PTD_SGMT_FEB_ID,PTD_SGMT_ID_02 | ||
PTD_SGMT_JAN_ID,PTD_SGMT_ID_01 | ||
PTD_SGMT_JUL_ID,PTD_SGMT_ID_07 | ||
PTD_SGMT_JUN_ID,PTD_SGMT_ID_06 | ||
PTD_SGMT_MAR_ID,PTD_SGMT_ID_03 | ||
PTD_SGMT_MAY_ID,PTD_SGMT_ID_05 | ||
PTD_SGMT_NOV_ID,PTD_SGMT_ID_11 | ||
PTD_SGMT_OCT_ID,PTD_SGMT_ID_10 | ||
PTD_SGMT_SEPT_ID,PTD_SGMT_ID_09 | ||
RDS_APR_IND,RDS_IND_04 | ||
RDS_AUG_IND,RDS_IND_08 | ||
RDS_DEC_IND,RDS_IND_12 | ||
RDS_FEB_IND,RDS_IND_02 | ||
RDS_JAN_IND,RDS_IND_01 | ||
RDS_JUL_IND,RDS_IND_07 | ||
RDS_JUN_IND,RDS_IND_06 | ||
RDS_MAR_IND,RDS_IND_03 | ||
RDS_MAY_IND,RDS_IND_05 | ||
RDS_MO_CNT,RDS_CVRG_MONS | ||
RDS_NOV_IND,RDS_IND_11 | ||
RDS_OCT_IND,RDS_IND_10 | ||
RDS_SEPT_IND,RDS_IND_09 | ||
RFRNC_YR,BENE_ENROLLMT_REF_YR | ||
RTI_RACE_CD,RTI_RACE_CD | ||
SAMPLE_GROUP,SAMPLE_GROUP | ||
STATE_CD, | ||
STATE_CNTY_ZIP_CD, | ||
STATE_CODE,STATE_CODE | ||
V_DOD_SW,VALID_DEATH_DT_SW |
Oops, something went wrong.