Skip to content

Commit

Permalink
Resolving parquet-tools memory error
Browse files Browse the repository at this point in the history
  • Loading branch information
chandrashekar-s committed May 9, 2024
1 parent 1dea2be commit 88f34e9
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 11 deletions.
11 changes: 6 additions & 5 deletions e2e-tests/controller-spark/controller_spark_sql_validation.sh
Original file line number Diff line number Diff line change
Expand Up @@ -182,11 +182,12 @@ function check_parquet() {
# check whether output directory has started receiving parquet files.
if [[ "$(ls -A $output)" ]]
then
local total_patients=$(java -jar ./parquet-tools-1.11.1.jar rowcount "${output}/*/Patient/" | awk '{print $3}')
local total_encounters=$(java -jar ./parquet-tools-1.11.1.jar rowcount "${output}/*/Encounter/" \
| awk '{print $3}')
local total_observations=$(java -jar ./parquet-tools-1.11.1.jar rowcount "${output}/*/Observation/" \
| awk '{print $3}')
local total_patients=$(java -Xms16g -Xmx16g -jar ./parquet-tools-1.11.1.jar rowcount \
"${output}/*/Patient/" | awk '{print $3}')
local total_encounters=$(java -Xms16g -Xmx16g -jar ./parquet-tools-1.11.1.jar rowcount \
"${output}/*/Encounter/" | awk '{print $3}')
local total_observations=$(java -Xms16g -Xmx16g -jar ./parquet-tools-1.11.1.jar rowcount \
"${output}/*/Observation/" | awk '{print $3}')

print_message "Total patients: $total_patients"
print_message "Total encounters: $total_encounters"
Expand Down
14 changes: 8 additions & 6 deletions e2e-tests/pipeline_validation.sh
Original file line number Diff line number Diff line change
Expand Up @@ -196,16 +196,18 @@ function fhir_source_query() {
#################################################
function test_parquet_sink() {
print_message "Counting number of patients, encounters and obs sinked to parquet files"
local total_patients_streamed=$(java -jar ./controller-spark/parquet-tools-1.11.1.jar rowcount \
"${HOME_PATH}/${PARQUET_SUBDIR}/Patient/" | awk '{print $3}')
local total_patients_streamed=$(java -Xms16g -Xmx16g -jar \
./controller-spark/parquet-tools-1.11.1.jar rowcount "${HOME_PATH}/${PARQUET_SUBDIR}/Patient/" | \
awk '{print $3}')
print_message "Total patients synced to parquet ---> ${total_patients_streamed}"

local total_encounters_streamed=$(java -jar ./controller-spark/parquet-tools-1.11.1.jar rowcount \
"${HOME_PATH}/${PARQUET_SUBDIR}/Encounter/" | awk '{print $3}')
local total_encounters_streamed=$(java -Xms16g -Xmx16g -jar \
./controller-spark/parquet-tools-1.11.1.jar rowcount "${HOME_PATH}/${PARQUET_SUBDIR}/Encounter/" \
| awk '{print $3}')
print_message "Total encounters synced to parquet ---> ${total_encounters_streamed}"

local total_obs_streamed=$(java -jar ./controller-spark/parquet-tools-1.11.1.jar rowcount \
"${HOME_PATH}/${PARQUET_SUBDIR}/Observation/" | awk '{print $3}')
local total_obs_streamed=$(java -Xms16g -Xmx16g -jar ./controller-spark/parquet-tools-1.11.1.jar \
rowcount "${HOME_PATH}/${PARQUET_SUBDIR}/Observation/" | awk '{print $3}')
print_message "Total obs synced to parquet ---> ${total_obs_streamed}"

if [[ "${total_patients_streamed}" == "${TOTAL_TEST_PATIENTS}" && "${total_encounters_streamed}" \
Expand Down

0 comments on commit 88f34e9

Please sign in to comment.