Skip to content

Commit

Permalink
Rename HGNC file to indicate version 2023-10
Browse files Browse the repository at this point in the history
  • Loading branch information
leexgh committed Jan 4, 2024
1 parent b93992f commit 1cc20f4
Show file tree
Hide file tree
Showing 5 changed files with 7 additions and 7 deletions.
6 changes: 3 additions & 3 deletions data/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ $(TMP_DIR)/ensembl_transcript_info.txt: $(TMP_DIR)/$(SPECIES).gff3.gz
python ../scripts/transform_gff_to_tsv_for_exon_info_from_ensembl.py $^ $@

# Add HGNC symbols, exons, UTRs, PFAM domains and Uniprot id to Ensembl Transcript
$(TMP_DIR)/ensembl_biomart_transcripts.json.gz: $(TMP_DIR)/ensembl_biomart_transcripts.txt $(TMP_DIR)/ensembl_transcript_info.txt $(VERSION)/input/ensembl_biomart_pfam.txt $(VERSION)/input/ensembl_biomart_refseq.txt $(VERSION)/input/ensembl_biomart_ccds.txt uniprot/export/$(VERSION)_enst_to_uniprot_mapping_id.txt common_input/isoform_overrides_uniprot.txt common_input/$(MSKCC_ISOFORM_OVERRIDES_FILE_NAME) common_input/hgnc_complete_set_2023-12-26.txt
$(TMP_DIR)/ensembl_biomart_transcripts.json.gz: $(TMP_DIR)/ensembl_biomart_transcripts.txt $(TMP_DIR)/ensembl_transcript_info.txt $(VERSION)/input/ensembl_biomart_pfam.txt $(VERSION)/input/ensembl_biomart_refseq.txt $(VERSION)/input/ensembl_biomart_ccds.txt uniprot/export/$(VERSION)_enst_to_uniprot_mapping_id.txt common_input/isoform_overrides_uniprot.txt common_input/$(MSKCC_ISOFORM_OVERRIDES_FILE_NAME) common_input/hgnc_complete_set_2023-10.txt
python ../scripts/add_domains_hugo_ccds_refseq_exon_info_uniprot_to_ensembl_transcript.py $^ $@

# for mouse a specific recipe without overrides
Expand All @@ -165,9 +165,9 @@ $(TMP_DIR)/ensembl_biomart_transcripts_mouse.json.gz: $(TMP_DIR)/ensembl_biomart
# give default/canonical geneid/transcript based on given hugo symbol takes
# about 50m to run (TODO: this can be easily optimized)
# isoform_overrides_genome_nexus.txt is made for genome nexus, others files are generated for vcf2maf
# Please note: we should keep hgnc_complete_set_2023-12-26 in sync with https://github.com/cBioPortal/datahub-study-curation-tools/blob/master/gene-table-update/build-input-for-importer/hgnc_complete_set.txt
# Please note: we should keep hgnc_complete_set_2023-10 in sync with https://github.com/cBioPortal/datahub-study-curation-tools/blob/master/gene-table-update/build-input-for-importer/hgnc_complete_set.txt
# isoform_overrides_oncokb_grch3*.txt is a list of OncoKB transcripts and genes, it's generated by download_oncokb_isoform_overrides.py
$(TMP_DIR)/ensembl_biomart_canonical_transcripts_per_hgnc.txt: $(TMP_DIR)/ensembl_canonical_data.txt common_input/hgnc_complete_set_2023-12-26.txt common_input/isoform_overrides_uniprot.txt common_input/$(MSKCC_ISOFORM_OVERRIDES_FILE_NAME) common_input/$(GENOME_NEXUS_ISOFORM_OVERRIDES_FILE_NAME) common_input/$(ONCOKB_ISOFORM_OVERRIDES_FILE_NAME) common_input/ignored_genes.txt
$(TMP_DIR)/ensembl_biomart_canonical_transcripts_per_hgnc.txt: $(TMP_DIR)/ensembl_canonical_data.txt common_input/hgnc_complete_set_2023-10.txt common_input/isoform_overrides_uniprot.txt common_input/$(MSKCC_ISOFORM_OVERRIDES_FILE_NAME) common_input/$(GENOME_NEXUS_ISOFORM_OVERRIDES_FILE_NAME) common_input/$(ONCOKB_ISOFORM_OVERRIDES_FILE_NAME) common_input/ignored_genes.txt
python ../scripts/make_one_canonical_transcript_per_gene.py $^ $@

# mouse version. A different script is called that set the canonicals based on Ensembl lookup.
Expand Down
4 changes: 2 additions & 2 deletions data/common_input/version_info.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
name version type id genome_build description url
VEP grch37 mirrored vep grch37 VEP determines the effect of your variants(SNPs, insertions, deletions, CNVs or structural variants) on genes, transcripts, and protein sequence, as well as regulatory regions. https://grch37.ensembl.org/info/docs/tools/vep/index.html
VEP grch38 mirrored vep grch38 VEP determines the effect of your variants(SNPs, insertions, deletions, CNVs or structural variants) on genes, transcripts, and protein sequence, as well as regulatory regions. https://useast.ensembl.org/info/docs/tools/vep/index.html
HGNC 2023-12 mirrored hgnc grch37 The resource for approved human gene nomenclature. Genome Nexus uses HGNC gene symbols in annotation http://ftp.ebi.ac.uk/pub/databases/genenames/hgnc/archive/monthly/tsv/
HGNC 2023-12 mirrored hgnc grch38 The resource for approved human gene nomenclature. Genome Nexus uses HGNC gene symbols in annotation http://ftp.ebi.ac.uk/pub/databases/genenames/hgnc/archive/monthly/tsv/
HGNC 2023-10 mirrored hgnc grch37 The resource for approved human gene nomenclature. Genome Nexus uses HGNC gene symbols in annotation http://ftp.ebi.ac.uk/pub/databases/genenames/hgnc/archive/monthly/tsv/
HGNC 2023-10 mirrored hgnc grch38 The resource for approved human gene nomenclature. Genome Nexus uses HGNC gene symbols in annotation http://ftp.ebi.ac.uk/pub/databases/genenames/hgnc/archive/monthly/tsv/
Cancer Hotspots v2 mirrored cancer_hotspots grch37 A resource for statistically significant mutations in cancer https://www.cancerhotspots.org
Cancer Hotspots v2 mirrored cancer_hotspots grch38 A resource for statistically significant mutations in cancer https://www.cancerhotspots.org
3D Hotspots v2 mirrored 3d_hotspots grch37 A resource for statistically significant mutations clustering in 3d protein structures in cancer https://www.3dhotspots.org/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ def main(ensembl_biomart_transcripts,
help="common_input/isoform_overrides_uniprot.txt")
parser.add_argument("vcf2maf_isoform_overrides_mskcc",
help="common_input/isoform_overrides_at_mskcc_grch37.txt or common_input/isoform_overrides_at_mskcc_grch38.txt")
parser.add_argument("hgnc_symbol_set", help="common_input/hgnc_complete_set_2023-12-26.txt")
parser.add_argument("hgnc_symbol_set", help="common_input/hgnc_complete_set_2023-10.txt")
parser.add_argument("ensembl_biomart_transcripts_json",
help="tmp/ensembl_biomart_transcripts.json.gz")

Expand Down
2 changes: 1 addition & 1 deletion scripts/make_one_canonical_transcript_per_gene.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def main(ensembl_biomart_geneids_transcript_info,
parser.add_argument("ensembl_biomart_geneids_transcript_info",
help="tmp/ensembl_biomart_geneids.transcript_info.txt")
parser.add_argument("hgnc_complete_set",
help="common_input/hgnc_complete_set_2023-12-26.txt")
help="common_input/hgnc_complete_set_2023-10.txt")
parser.add_argument("isoform_overrides_uniprot",
help="common_input/isoform_overrides_uniprot.txt")
parser.add_argument("isoform_overrides_at_mskcc",
Expand Down

0 comments on commit 1cc20f4

Please sign in to comment.