diff --git a/.dockstore.yml b/.dockstore.yml
index a7c31ff7225..5c91300c877 100644
--- a/.dockstore.yml
+++ b/.dockstore.yml
@@ -128,6 +128,7 @@ workflows:
        branches:
          - master
          - ah_var_store
+         - rc-remove-sites-only-step
    - name: GvsValidateVat
      subclass: WDL
      primaryDescriptorPath: /scripts/variantstore/variant_annotations_table/GvsValidateVAT.wdl
diff --git a/scripts/variantstore/variant_annotations_table/custom_annotations_template.tsv b/scripts/variantstore/variant_annotations_table/custom_annotations_template.tsv
index 7a5ba70c823..a578ea9b65f 100644
--- a/scripts/variantstore/variant_annotations_table/custom_annotations_template.tsv
+++ b/scripts/variantstore/variant_annotations_table/custom_annotations_template.tsv
@@ -1,7 +1,7 @@
 #title=gvsAnnotations
 #assembly=GRCh38
 #matchVariantsBy=allele
-#CHROM	POS	REF	ALT	AC	AN	AF	AC_Hom	AC_Het	AC_afr	AN_afr	AF_afr	AC_Hom_afr	AC_Het_afr	AC_amr	AN_amr	AF_amr	AC_Hom_amr	AC_Het_amr	AC_eas	AN_eas	AF_eas	AC_Hom_eas	AC_Het_eas	AC_eur	AN_eur	AF_eur	AC_Hom_eur	AC_Het_eur	AC_mid	AN_mid	AF_mid	AC_Hom_mid	AC_Het_mid	AC_oth	AN_oth	AF_oth	AC_Hom_oth	AC_Het_oth	AC_sas	AN_sas	AF_sas	AC_Hom_sas	AC_Het_sas
-#categories	.	.	.	AlleleCount	AlleleNumber	AlleleFrequency	AlleleCount	AlleleCount	AlleleCount	AlleleNumber	AlleleFrequency	AlleleCount	AlleleCount	AlleleCount	AlleleNumber	AlleleFrequency	AlleleCount	AlleleCount	AlleleCount	AlleleNumber	AlleleFrequency	AlleleCount	AlleleCount	AlleleCount	AlleleNumber	AlleleFrequency	AlleleCount	AlleleCount	AlleleCount	AlleleNumber	AlleleFrequency	AlleleCount	AlleleCount	AlleleCount	AlleleNumber	AlleleFrequency	AlleleCount	AlleleCount	AlleleCount	AlleleNumber	AlleleFrequency	AlleleCount	AlleleCount
-#descriptions	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.
-#type	.	.	.	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number
+#CHROM	POS	REF	ALT	AC	AN	AF	AC_Hom	AC_Het	AC_Hemi	AC_afr	AN_afr	AF_afr	AC_Hom_afr	AC_Het_afr	AC_Hemi_afr	AC_amr	AN_amr	AF_amr	AC_Hom_amr	AC_Het_amr	AC_Hemi_amr	AC_eas	AN_eas	AF_eas	AC_Hom_eas	AC_Het_eas	AC_Hemi_eas	AC_eur	AN_eur	AF_eur	AC_Hom_eur	AC_Het_eur	AC_Hemi_eur	AC_mid	AN_mid	AF_mid	AC_Hom_mid	AC_Het_mid	AC_Hemi_mid	AC_oth	AN_oth	AF_oth	AC_Hom_oth	AC_Het_oth	AC_Hemi_oth	AC_sas	AN_sas	AF_sas	AC_Hom_sas	AC_Het_sas	AC_Hemi_sas
+#categories	.	.	.	AlleleCount	AlleleNumber	AlleleFrequency	AlleleCount	AlleleCount	AlleleCount	AlleleCount	AlleleNumber	AlleleFrequency	AlleleCount	AlleleCount	AlleleCount	AlleleCount	AlleleNumber	AlleleFrequency	AlleleCount	AlleleCount	AlleleCount	AlleleCount	AlleleNumber	AlleleFrequency	AlleleCount	AlleleCount	AlleleCount	AlleleCount	AlleleNumber	AlleleFrequency	AlleleCount	AlleleCount	AlleleCount	AlleleCount	AlleleNumber	AlleleFrequency	AlleleCount	AlleleCount	AlleleCount	AlleleCount	AlleleNumber	AlleleFrequency	AlleleCount	AlleleCount	AlleleCount	AlleleCount	AlleleNumber	AlleleFrequency	AlleleCount	AlleleCount	AlleleCount
+#descriptions	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.
+#type	.	.	.	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number	number
diff --git a/scripts/variantstore/wdl/GvsSitesOnlyVCF.example.inputs.json b/scripts/variantstore/wdl/GvsSitesOnlyVCF.example.inputs.json
index 2061690b382..7e3f9f130b9 100644
--- a/scripts/variantstore/wdl/GvsSitesOnlyVCF.example.inputs.json
+++ b/scripts/variantstore/wdl/GvsSitesOnlyVCF.example.inputs.json
@@ -12,5 +12,6 @@
   "GvsSitesOnlyVCF.project_id": "spec-ops-aou",
   "GvsSitesOnlyVCF.dataset_name": "anvil_100_for_testing",
   "GvsSitesOnlyVCF.AnAcAf_annotations_template": "gs://broad-dsp-spec-ops/scratch/rcremer/Nirvana/custom_annotations_template.tsv",
-  "GvsSitesOnlyVCF.ancestry_file": "gs://broad-dsp-spec-ops/scratch/rcremer/Nirvana/subpop.tsv"
+  "GvsSitesOnlyVCF.ancestry_file": "gs://broad-dsp-spec-ops/scratch/rcremer/Nirvana/subpop.tsv",
+  "GvsSitesOnlyVCF.reference": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta"
 }
diff --git a/scripts/variantstore/wdl/GvsSitesOnlyVCF.wdl b/scripts/variantstore/wdl/GvsSitesOnlyVCF.wdl
index 2adccbb9da1..c4921f86684 100644
--- a/scripts/variantstore/wdl/GvsSitesOnlyVCF.wdl
+++ b/scripts/variantstore/wdl/GvsSitesOnlyVCF.wdl
@@ -3,8 +3,6 @@ workflow GvsSitesOnlyVCF {
    input {
         File inputFileofFileNames
         File inputFileofIndexFileNames
-        String output_sites_only_file_name
-        String output_annotated_file_name
         String project_id
         String dataset_name
         File nirvana_data_directory
@@ -34,6 +32,9 @@ workflow GvsSitesOnlyVCF {
 
     ## Scatter across the shards from the GVS jointVCF
     scatter(i in range(length(MakeSubpopulationFiles.input_vcfs)) ) {
+        ## Create a sites-only VCF from the original GVS jointVCF
+        ## Calculate AC/AN/AF for subpopulations and extract them for custom annotations
+        String input_vcf_name = basename(MakeSubpopulationFiles.input_vcfs[i], ".vcf.gz")
         call ExtractAnAcAfFromVCF {
             input:
               input_vcf = MakeSubpopulationFiles.input_vcfs[i],
@@ -44,21 +45,12 @@ workflow GvsSitesOnlyVCF {
               ref = reference
         }
 
-      ## Create a sites-only VCF from the original GVS jointVCF
-        call SitesOnlyVcf {
-          input:
-            input_vcf = ExtractAnAcAfFromVCF.output_vcf,
-            input_vcf_index = ExtractAnAcAfFromVCF.output_vcf_index,
-            service_account_json_path = service_account_json_path,
-            output_filename = "${output_sites_only_file_name}_${i}.sites_only.vcf.gz"
-        }
-
       ## Use Nirvana to annotate the sites-only VCF and include the AC/AN/AF calculations as custom annotations
         call AnnotateVCF {
           input:
-            input_vcf = SitesOnlyVcf.output_vcf,
-            input_vcf_index = SitesOnlyVcf.output_vcf_idx,
-            output_annotated_file_name = "${output_annotated_file_name}_${i}",
+            input_vcf = ExtractAnAcAfFromVCF.output_vcf,
+            input_vcf_index = ExtractAnAcAfFromVCF.output_vcf_index,
+            output_annotated_file_name = "${input_vcf_name}_annotated",
             nirvana_data_tar = nirvana_data_directory,
             custom_annotations_file = ExtractAnAcAfFromVCF.annotations_file,
         }
@@ -66,7 +58,7 @@ workflow GvsSitesOnlyVCF {
         call PrepAnnotationJson {
           input:
             annotation_json = AnnotateVCF.annotation_json,
-            output_file_suffix = "${i}.json.gz",
+            output_file_suffix = "${input_vcf_name}.json.gz",
             output_path = output_path,
             service_account_json_path = service_account_json_path
         }
@@ -90,12 +82,13 @@ workflow GvsSitesOnlyVCF {
          project_id = project_id,
          dataset_name = dataset_name,
          counts_variants = ExtractAnAcAfFromVCF.count_variants,
+         track_dropped_variants = ExtractAnAcAfFromVCF.track_dropped,
          table_suffix = table_suffix,
          service_account_json_path = service_account_json_path,
          load_jsons_done = BigQueryLoadJson.done
     }
-    scatter(i in range(length(contig_array)) ) {
 
+    scatter(i in range(length(contig_array)) ) {
       call BigQueryExportVat {
         input:
             contig = contig_array[i],
@@ -158,7 +151,7 @@ task MakeSubpopulationFiles {
     # ------------------------------------------------
     # Runtime settings:
     runtime {
-        docker: "us.gcr.io/broad-dsde-methods/variantstore:ah_var_store_20210916"
+        docker: "us.gcr.io/broad-dsde-methods/variantstore:ah_var_store_20211007"
         memory: "1 GB"
         preemptible: 3
         cpu: "1"
@@ -200,7 +193,8 @@ task ExtractAnAcAfFromVCF {
     String normalized_vcf_compressed = "normalized.vcf.gz"
     String normalized_vcf_indexed = "normalized.vcf.gz.tbi"
 
-    # separate multi-allelic sites into their own lines, remove deletions and extract the an/ac/af & sc
+    # separate multi-allelic sites into their own lines, remove deletions and filtered sites and make a sites only vcf
+    # while extracting the an/ac/af & sc by subpopulation into a tsv
     command <<<
         set -e
 
@@ -217,8 +211,12 @@ task ExtractAnAcAfFromVCF {
         gsutil cp ~{input_vcf_index} ~{local_input_vcf_index}
         gsutil cp ~{ref} Homo_sapiens_assembly38.fasta
 
-        # TODO Compare the ancestry list with the sample list and throw an error (but dont fail the job) if there are samples that are in one, but not the other. Two different errors.
-        # awk '{print $2}' ~{subpopulation_sample_list} | tail -n +2 | sort -u > collected_subpopulations.txt
+        ## compare the ancestry sample list with the vcf sample list
+        # TODO throw an error (but dont fail the job) if there are samples that are in one, but not the other. Throw two different errors.
+        # Currently commented out to save on io with the AoU beta VAT creation
+        # awk '{print $1}' ~{subpopulation_sample_list} | tail -n +2 | sort -u > collected_subpopulation_samples.txt
+        # bcftools query --list-samples ~{local_input_vcf} | sort -u > collected_samples.txt
+        # diff collected_subpopulation_samples.txt collected_samples.txt
 
         # expected_subpopulations = [
         # "afr",
@@ -230,39 +228,56 @@ task ExtractAnAcAfFromVCF {
         # "sas"
         #]
 
-
-        bcftools norm -m-any ~{local_input_vcf} | \
-        bcftools norm --check-ref w  -f Homo_sapiens_assembly38.fasta > ~{normalized_vcf}
-
-
-        ## make a file of just the first 4 columns of the tsv (maybe I could just bcftools query it?)
-        bcftools query ~{normalized_vcf} -f '%CHROM\t%POS\t%REF\t%ALT\n' > check_duplicates.tsv
+        ## track the dropped variants with +500 alt alleles or N's in the reference (Since Nirvana cant handle N as a base, drop them for now)
+        bcftools view -i 'N_ALT>500 || REF~"N"' ~{local_input_vcf} | bcftools query -f '%CHROM\t%POS\t%ID\t%REF\t%ALT\n' > track_dropped.tsv
+
+        ## filter out sites with too many alt alleles
+        bcftools view -e 'N_ALT>500 || REF~"N"' --no-update  ~{local_input_vcf} | \
+        ## filter out the non-passing sites
+        bcftools view  -f 'PASS,.' --no-update | \
+        ## normalize, left align and split multi allelic sites to new lines, remove duplicate lines
+        bcftools norm -m- --check-ref w -f Homo_sapiens_assembly38.fasta | \
+        ## filter out spanning deletions and variants with an AC of 0
+        bcftools view  -e 'ALT[0]="*" || AC=0' --no-update | \
+        ## ensure that we respect the FT tag
+        bcftools filter -i "FORMAT/FT='PASS,.'" --set-GTs . > ~{normalized_vcf}
+
+        ## clean up unneeded file
+        rm ~{local_input_vcf}
+
+        ## make a file of just the first 5 columns of the tsv
+        bcftools query ~{normalized_vcf} -f '%CHROM\t%POS\t%ID\t%REF\t%ALT\n' > check_duplicates.tsv
         ## check it for duplicates and put them in a new file
-        sort check_duplicates.tsv | uniq -d | cut -f1,2  > duplicates.tsv
-        ## remove those rows (this will be ALL rows with this position--so good rows too, potentially we want to grab f1,4 to do this with)
-        grep -v -wFf duplicates.tsv ~{normalized_vcf} | grep -v "AC=0;"  > deduplicated.vcf
-
-        wc -l duplicates.tsv
-
-        bcftools plugin fill-tags  -- deduplicated.vcf -S ~{subpopulation_sample_list} -t AC,AF,AN,AC_het,AC_hom | bcftools query -f \
-        '%CHROM\t%POS\t%REF\t%ALT\t%AC\t%AN\t%AF\t%AC_Hom\t%AC_Het\t%AC_afr\t%AN_afr\t%AF_afr\t%AC_Hom_afr\t%AC_Het_afr\t%AC_amr\t%AN_amr\t%AF_amr\t%AC_Hom_amr\t%AC_Het_amr\t%AC_eas\t%AN_eas\t%AF_eas\t%AC_Hom_eas\t%AC_Het_eas\t%AC_eur\t%AN_eur\t%AF_eur\t%AC_Hom_eur\t%AC_Het_eur\t%AC_mid\t%AN_mid\t%AF_mid\t%AC_Hom_mid\t%AC_Het_mid\t%AC_oth\t%AN_oth\t%AF_oth\t%AC_Hom_oth\t%AC_Het_oth\t%AC_sas\t%AN_sas\t%AF_sas\t%AC_Hom_sas\t%AC_Het_sas\n' \
-        | grep -v "*" >> ~{custom_annotations_file_name}
-
-        ### for validation of the pipeline
-        wc -l ~{custom_annotations_file_name} | awk '{print $1}'  > count.txt
-        # Should this be where we do the filtering of the AC/AN/AF values rather than in the python?
-
-        ### compress the vcf and index it
-        bcftools view -I deduplicated.vcf -Oz -o ~{normalized_vcf_compressed}
+        sort check_duplicates.tsv | uniq -d | cut -f1,2,3,4,5  > duplicates.tsv
+        rm check_duplicates.tsv ## clean up
+        ## remove those rows (that match up to the first 5 cols)
+        grep -v -wFf duplicates.tsv ~{normalized_vcf} > deduplicated.vcf
+        rm ~{normalized_vcf} ## clean up
+
+        ## add duplicates to the file tracking dropped variants
+        cat duplicates.tsv >> track_dropped.tsv
+        rm duplicates.tsv ## clean up unneeded file
+
+        ## calculate annotations for all subpopulations
+        bcftools plugin fill-tags  -- deduplicated.vcf -S ~{subpopulation_sample_list} -t AC,AF,AN,AC_het,AC_hom,AC_Hemi | bcftools query -f \
+        '%CHROM\t%POS\t%REF\t%ALT\t%AC\t%AN\t%AF\t%AC_Hom\t%AC_Het\t%AC_Hemi\t%AC_afr\t%AN_afr\t%AF_afr\t%AC_Hom_afr\t%AC_Het_afr\t%AC_Hemi_afr\t%AC_amr\t%AN_amr\t%AF_amr\t%AC_Hom_amr\t%AC_Het_amr\t%AC_Hemi_amr\t%AC_eas\t%AN_eas\t%AF_eas\t%AC_Hom_eas\t%AC_Het_eas\t%AC_Hemi_eas\t%AC_eur\t%AN_eur\t%AF_eur\t%AC_Hom_eur\t%AC_Het_eur\t%AC_Hemi_eur\t%AC_mid\t%AN_mid\t%AF_mid\t%AC_Hom_mid\t%AC_Het_mid\t%AC_Hemi_mid\t%AC_oth\t%AN_oth\t%AF_oth\t%AC_Hom_oth\t%AC_Het_oth\t%AC_Hemi_oth\t%AC_sas\t%AN_sas\t%AF_sas\t%AC_Hom_sas\t%AC_Het_sas\t%AC_Hemi_sas\n' \
+        >> ~{custom_annotations_file_name}
+
+        ## for validation of the pipeline
+        wc -l ~{custom_annotations_file_name} | awk '{print $1 -7}'  > count.txt
+
+        ## compress the vcf and index it, make it sites-only
+        bcftools view --no-update --drop-genotypes deduplicated.vcf -Oz -o ~{normalized_vcf_compressed}
+        ## if we can spare the IO and want to pass a smaller file we can also drop the info field w bcftools annotate -x INFO
         bcftools index --tbi  ~{normalized_vcf_compressed}
 
     >>>
     # ------------------------------------------------
     # Runtime settings:
     runtime {
-        docker: "us.gcr.io/broad-dsde-methods/variantstore:ah_var_store_20210916"
-        memory: "20 GB"
-        preemptible: 1
+        docker: "us.gcr.io/broad-dsde-methods/variantstore:ah_var_store_20211007"
+        memory: "32 GB"
+        preemptible: 3
         cpu: "2"
         disks: "local-disk 500 SSD"
     }
@@ -271,52 +286,12 @@ task ExtractAnAcAfFromVCF {
     output {
         File annotations_file = "~{custom_annotations_file_name}"
         Int count_variants = read_int("count.txt")
+        File track_dropped = "track_dropped.tsv"
         File output_vcf = "~{normalized_vcf_compressed}"
         File output_vcf_index = "~{normalized_vcf_indexed}"
     }
 }
 
-task SitesOnlyVcf {
-    input {
-        File input_vcf
-        File input_vcf_index
-        String? service_account_json_path
-        String output_filename
-    }
-    String output_vcf_idx = basename(output_filename) + ".tbi"
-
-    command <<<
-        set -e
-
-        # Adding `--add-output-vcf-command-line false` so that the VCF header doesn't have a timestamp
-        # in it so that downstream steps can call cache
-
-        gatk --java-options "-Xmx12288m" \
-            SelectVariants \
-                -V ~{input_vcf} \
-                --add-output-vcf-command-line false \
-                --exclude-filtered \
-                --sites-only-vcf-output \
-                -O ~{output_filename}
-
-     >>>
-    # ------------------------------------------------
-    # Runtime settings:
-    runtime {
-        docker: "broadinstitute/gatk:4.2.0.0"
-        memory: "15 GB"
-        preemptible: 3
-        cpu: "2"
-        disks: "local-disk 250 HDD"
-    }
-    # ------------------------------------------------
-    # Outputs:
-    output {
-        File output_vcf="~{output_filename}"
-        File output_vcf_idx="~{output_vcf_idx}"
-    }
-}
-
 task AnnotateVCF {
     input {
         File input_vcf
@@ -375,10 +350,10 @@ task AnnotateVCF {
     # Runtime settings:
     runtime {
         docker: "annotation/nirvana:3.14"
-        memory: "20 GB"
+        memory: "32 GB"
         cpu: "2"
         preemptible: 5
-        disks: "local-disk 250 SSD"
+        disks: "local-disk 500 SSD"
     }
     # ------------------------------------------------
     # Outputs:
@@ -430,7 +405,7 @@ task PrepAnnotationJson {
     # ------------------------------------------------
     # Runtime settings:
     runtime {
-        docker: "us.gcr.io/broad-dsde-methods/variantstore:ah_var_store_20210916"
+        docker: "us.gcr.io/broad-dsde-methods/variantstore:ah_var_store_20211007"
         memory: "8 GB"
         preemptible: 5
         cpu: "1"
@@ -668,6 +643,7 @@ task BigQuerySmokeTest {
         String project_id
         String dataset_name
         Array[Int] counts_variants
+        Array[File] track_dropped_variants
         String table_suffix
         String? service_account_json_path
         Boolean load_jsons_done
@@ -695,6 +671,12 @@ task BigQuerySmokeTest {
         # sum all the initial input variants across the shards
 
         INITIAL_VARIANT_COUNT=$(python -c "print(sum([~{sep=', ' counts_variants}]))")
+        ## here a list of files: ~{sep=', ' track_dropped_variants}
+        ## I need to get the lines from each file
+        awk 'FNR==1{print ""}1' ~{sep=' ' track_dropped_variants} > dropped_variants.txt
+
+        echo "Dropped variants:"
+        cat dropped_variants.txt
 
         # Count number of variants in the VAT
         bq query --nouse_legacy_sql --project_id=~{project_id} --format=csv 'SELECT COUNT (DISTINCT vid) AS count FROM `~{dataset_name}.~{vat_table}`' > bq_variant_count.csv
diff --git a/scripts/variantstore/wdl/extract/create_variant_annotation_table.py b/scripts/variantstore/wdl/extract/create_variant_annotation_table.py
index 0097fa18b3d..e305441a954 100644
--- a/scripts/variantstore/wdl/extract/create_variant_annotation_table.py
+++ b/scripts/variantstore/wdl/extract/create_variant_annotation_table.py
@@ -11,13 +11,15 @@
 
 vat_nirvana_positions_dictionary = {
   "position": "position", # required
+  "ref_allele": "refAllele", # required
+  "alt_allele": "altAlleles", # required
 }
 
 vat_nirvana_variants_dictionary = {
   "vid": "vid", # required
   "contig": "chromosome", # required
-  "ref_allele": "refAllele", # required
-  "alt_allele": "altAllele", # required
+  # "ref_allele": "refAllele", # required # do we want to grab both to compare?
+  # "alt_allele": "altAllele", # required
   "variant_type": "variantType", # required
   "genomic_location": "hgvsg", # required
   "dbsnp_rsid": "dbsnp",  # nullable
@@ -118,6 +120,10 @@ def check_filtering(variant):
     if variant.get("gvsAnnotations") == None: # <-- enum since we need this to be in tandem with the custom annotations header / template
       print("WARNING: There has been an error in creating custom annotations for AC/AF/AN", variant.get("vid"))
       return False
+    # skip any row (with a warning) if the AC value is 0
+    elif variant["gvsAnnotations"].get("AC") == 0:
+      print("WARNING: Its AC is 0 so we are dropping this variant", variant.get("vid"))
+      return False
     # skip any row (with a warning) if AC, AN or AF is missing
     elif variant["gvsAnnotations"].get("AC") == None:
       print("WARNING: There has been an error-- there is no AC value---should AN be 0 for this variant?", variant.get("vid"))
@@ -128,10 +134,6 @@ def check_filtering(variant):
     elif variant["gvsAnnotations"].get("AF") == None:
       print("WARNING: There has been an error-- there is an AC value---but no AF value", variant.get("vid"))
       return False
-    # skip any row (with a warning) if the AC value is 0
-    elif variant["gvsAnnotations"].get("AC") == 0:
-      print("WARNING: Its AC is 0 so we are dropping this variant", variant.get("vid"))
-      return False
     else:
       return True
 
@@ -176,7 +178,7 @@ def get_subpopulation_calculations(subpop_annotations):
       subpop_an_val = subpop_annotations.get("_".join(["AN", gvs_subpop]))
       subpop_af_val = subpop_annotations.get("_".join(["AF", gvs_subpop]))
        # note the assumption is made that AC_Hom must be even because by it's nature it means there are two, but there could be an error
-      subpop_sc_val = int(subpop_annotations.get("_".join(["AC_Hom", gvs_subpop])) / 2 ) + subpop_annotations.get("_".join(["AC_Het", gvs_subpop]))
+      subpop_sc_val = int(subpop_annotations.get("_".join(["AC_Hom", gvs_subpop])) / 2 ) + subpop_annotations.get("_".join(["AC_Het", gvs_subpop])) + subpop_annotations.get("_".join(["AC_Hemi", gvs_subpop]))
       # here we set the subpopulation ac/an/af values
       row["_".join(["gvs", gvs_subpop, "ac"])] = subpop_ac_val
       row["_".join(["gvs", gvs_subpop, "an"])] = subpop_an_val
@@ -196,9 +198,11 @@ def get_subpopulation_calculations(subpop_annotations):
     row["gvs_max_sc"] = max_sc
     return row
 
-def make_annotated_json_row(row_position, variant_line, transcript_line):
+def make_annotated_json_row(row_position, row_ref, row_alt, variant_line, transcript_line):
     row = {}
     row["position"] = row_position # this is a required field
+    row["ref_allele"] = row_ref # this is a required field
+    row["alt_allele"] = row_alt # this is a required field
 
     for vat_variants_fieldname in vat_nirvana_variants_dictionary.keys():  # like "contig"
       nirvana_variants_fieldname = vat_nirvana_variants_dictionary.get(vat_variants_fieldname)
@@ -287,17 +291,23 @@ def make_positions_json(annotated_json, output_json):
 
   for p in positions:
     position=p['position']
+    refAllele=p['refAllele'] # ref_allele
+    altAlleles=p['altAlleles'] # this is an array that we need to split into each alt_allele
     variants=p['variants']
+    if '*' in altAlleles:
+      altAlleles.remove('*')
+    ## there should be the same number of altAlleles as variants, right?
+    assert len(altAlleles)==len(variants)
     # row for each variant - transcript
     # so let's start with each variant
-    for variant in variants:
+    for index, variant in enumerate(variants):
       # check if it's a row we care about
       if check_filtering(variant) is False:
         continue
       # remember that we want one for each variant-transcript and variant-null for variants without transcripts
       if variant.get("transcripts") == None:
         # then we make a special row
-        row = make_annotated_json_row(position, variant, None)
+        row = make_annotated_json_row(position, refAllele, altAlleles[index], variant, None)
         json_str = json.dumps(row) + "\n"
         json_bytes = json_str.encode('utf-8')
         output_file.write(json_bytes)
@@ -308,13 +318,13 @@ def make_positions_json(annotated_json, output_json):
         if "Ensembl" in sources:
           for transcript in transcript_lines:
             if transcript.get('source') == "Ensembl":
-              row = make_annotated_json_row(position, variant, transcript)
+              row = make_annotated_json_row(position, refAllele, altAlleles[index], variant, transcript)
               json_str = json.dumps(row) + "\n"
               json_bytes = json_str.encode('utf-8')
               output_file.write(json_bytes)
         else:
           # if there are transcripts, but they are not Ensembl, we now only want one row in the VAT, not one row per transcript
-          row = make_annotated_json_row(position, variant, None)
+          row = make_annotated_json_row(position, refAllele, altAlleles[index], variant, None)
           json_str = json.dumps(row) + "\n"
           json_bytes = json_str.encode('utf-8')
           output_file.write(json_bytes)