diff --git a/src/pyobo/resources/so.py b/src/pyobo/resources/so.py new file mode 100644 index 00000000..f1725952 --- /dev/null +++ b/src/pyobo/resources/so.py @@ -0,0 +1,55 @@ +"""Loading of the relations ontology names.""" + +from __future__ import annotations + +import csv +import os +from functools import lru_cache + +import requests + +__all__ = [ + "get_so_name", + "load_so", +] + +HERE = os.path.abspath(os.path.dirname(__file__)) +SO_PATH = os.path.join(HERE, "so.tsv") +SO_JSON_URL = "https://github.com/The-Sequence-Ontology/SO-Ontologies/raw/refs/heads/master/Ontology_Files/so-simple.json" +SO_URI_PREFIX = "http://purl.obolibrary.org/obo/SO_" + + +def get_so_name(so_id: str) -> str | None: + """Get the name from the identifier.""" + return load_so().get(so_id) + + +@lru_cache(maxsize=1) +def load_so() -> dict[str, str]: + """Load the Sequence Ontology names.""" + if not os.path.exists(SO_PATH): + download_so() + with open(SO_PATH) as file: + return dict(csv.reader(file, delimiter="\t")) + + +def download_so(): + """Download the latest version of the Relation Ontology.""" + rows = [] + res_json = requests.get(SO_JSON_URL).json() + for node in res_json["graphs"][0]["nodes"]: + uri = node["id"] + if not uri.startswith(SO_URI_PREFIX): + continue + identifier = uri.removeprefix(SO_URI_PREFIX) + name = node.get("lbl") + if name: + rows.append((identifier, name)) + + with open(SO_PATH, "w") as file: + writer = csv.writer(file, delimiter="\t") + writer.writerows(sorted(rows, key=lambda x: int(x[0]))) + + +if __name__ == "__main__": + download_so() diff --git a/src/pyobo/resources/so.tsv b/src/pyobo/resources/so.tsv new file mode 100644 index 00000000..a4f87673 --- /dev/null +++ b/src/pyobo/resources/so.tsv @@ -0,0 +1,2604 @@ +0000000 Sequence_Ontology +0000001 region +0000002 sequence_secondary_structure +0000003 G_quartet +0000004 interior_coding_exon +0000005 satellite_DNA +0000006 PCR_product +0000007 read_pair +0000008 gene_sensu_your_favorite_organism +0000009 gene_class +0000010 protein_coding +0000011 non_protein_coding +0000012 scRNA_primary_transcript +0000013 scRNA +0000014 INR_motif +0000015 DPE_motif +0000016 BREu_motif +0000017 PSE_motif +0000018 linkage_group +0000020 RNA_internal_loop +0000021 asymmetric_RNA_internal_loop +0000022 A_minor_RNA_motif +0000023 K_turn_RNA_motif +0000024 sarcin_like_RNA_motif +0000025 symmetric_RNA_internal_loop +0000026 RNA_junction_loop +0000027 RNA_hook_turn +0000028 base_pair +0000029 WC_base_pair +0000030 sugar_edge_base_pair +0000031 aptamer +0000032 DNA_aptamer +0000033 RNA_aptamer +0000034 morpholino_oligo +0000035 riboswitch +0000036 matrix_attachment_site +0000037 locus_control_region +0000038 match_set +0000039 match_part +0000040 genomic_clone +0000041 sequence_operation +0000042 pseudogene_attribute +0000043 processed_pseudogene +0000044 pseudogene_by_unequal_crossing_over +0000045 delete +0000046 insert +0000047 invert +0000048 substitute +0000049 translocate +0000050 gene_part +0000051 probe +0000052 assortment_derived_deficiency +0000053 sequence_variant_affecting_regulatory_region +0000054 aneuploid +0000055 hyperploid +0000056 hypoploid +0000057 operator +0000058 assortment_derived_aneuploid +0000059 nuclease_binding_site +0000060 compound_chromosome_arm +0000061 restriction_enzyme_binding_site +0000062 deficient_intrachromosomal_transposition +0000063 deficient_interchromosomal_transposition +0000064 gene_by_transcript_attribute +0000065 free_chromosome_arm +0000066 gene_by_polyadenylation_attribute +0000067 gene_to_gene_feature +0000068 overlapping +0000069 inside_intron +0000070 inside_intron_antiparallel +0000071 inside_intron_parallel +0000072 end_overlapping_gene +0000073 five_prime_three_prime_overlap +0000074 five_prime_five_prime_overlap +0000075 three_prime_three_prime_overlap +0000076 three_prime_five_prime_overlap +0000077 antisense +0000078 polycistronic_transcript +0000079 dicistronic_transcript +0000080 operon_member +0000081 gene_array_member +0000082 processed_transcript_attribute +0000083 macronuclear_sequence +0000084 micronuclear_sequence +0000085 gene_by_genome_location +0000086 gene_by_organelle_of_genome +0000087 nuclear_gene +0000088 mt_gene +0000089 kinetoplast_gene +0000090 plastid_gene +0000091 apicoplast_gene +0000092 ct_gene +0000093 chromoplast_gene +0000094 cyanelle_gene +0000095 leucoplast_gene +0000096 proplastid_gene +0000097 nucleomorph_gene +0000098 plasmid_gene +0000099 proviral_gene +0000100 endogenous_retroviral_gene +0000101 transposable_element +0000102 expressed_sequence_match +0000103 clone_insert_end +0000104 polypeptide +0000105 chromosome_arm +0000106 non_capped_primary_transcript +0000107 sequencing_primer +0000108 mRNA_with_frameshift +0000109 sequence_variant_obs +0000110 sequence_feature +0000111 transposable_element_gene +0000112 primer +0000113 proviral_region +0000114 methylated_cytosine +0000115 transcript_feature +0000116 edited +0000117 transcript_with_readthrough_stop_codon +0000118 transcript_with_translational_frameshift +0000119 regulated +0000120 protein_coding_primary_transcript +0000121 forward_primer +0000122 RNA_sequence_secondary_structure +0000123 transcriptionally_regulated +0000124 transcriptionally_constitutive +0000125 transcriptionally_induced +0000126 transcriptionally_repressed +0000127 silenced_gene +0000128 gene_silenced_by_DNA_modification +0000129 gene_silenced_by_DNA_methylation +0000130 post_translationally_regulated +0000131 translationally_regulated +0000132 reverse_primer +0000133 epigenetically_modified +0000134 genomically_imprinted +0000135 maternally_imprinted +0000136 paternally_imprinted +0000137 allelically_excluded +0000138 gene_rearranged_at_DNA_level +0000139 ribosome_entry_site +0000140 attenuator +0000141 terminator +0000142 DNA_sequence_secondary_structure +0000143 assembly_component +0000144 primary_transcript_attribute +0000145 recoded_codon +0000146 capped +0000147 exon +0000148 supercontig +0000149 contig +0000150 read +0000151 clone +0000152 YAC +0000153 BAC +0000154 PAC +0000155 plasmid +0000156 cosmid +0000157 phagemid +0000158 fosmid +0000159 deletion +0000160 lambda_clone +0000161 methylated_adenine +0000162 splice_site +0000163 five_prime_cis_splice_site +0000164 three_prime_cis_splice_site +0000165 enhancer +0000166 enhancer_bound_by_factor +0000167 promoter +0000168 restriction_enzyme_cut_site +0000169 RNApol_I_promoter +0000170 RNApol_II_promoter +0000171 RNApol_III_promoter +0000172 CAAT_signal +0000173 GC_rich_promoter_region +0000174 TATA_box +0000175 minus_10_signal +0000176 minus_35_signal +0000177 cross_genome_match +0000178 operon +0000179 clone_insert_start +0000180 retrotransposon +0000181 translated_nucleotide_match +0000182 DNA_transposon +0000183 non_transcribed_region +0000184 U2_intron +0000185 primary_transcript +0000186 LTR_retrotransposon +0000187 repeat_family +0000188 intron +0000189 non_LTR_retrotransposon +0000190 five_prime_intron +0000191 interior_intron +0000192 three_prime_intron +0000193 RFLP_fragment +0000194 LINE_element +0000195 coding_exon +0000196 five_prime_coding_exon_coding_region +0000197 three_prime_coding_exon_coding_region +0000198 noncoding_exon +0000199 translocation +0000200 five_prime_coding_exon +0000201 interior_exon +0000202 three_prime_coding_exon +0000203 UTR +0000204 five_prime_UTR +0000205 three_prime_UTR +0000206 SINE_element +0000207 simple_sequence_length_variation +0000208 terminal_inverted_repeat_element +0000209 rRNA_primary_transcript +0000210 tRNA_primary_transcript +0000211 alanine_tRNA_primary_transcript +0000212 arginine_tRNA_primary_transcript +0000213 asparagine_tRNA_primary_transcript +0000214 aspartic_acid_tRNA_primary_transcript +0000215 cysteine_tRNA_primary_transcript +0000216 glutamic_acid_tRNA_primary_transcript +0000217 glutamine_tRNA_primary_transcript +0000218 glycine_tRNA_primary_transcript +0000219 histidine_tRNA_primary_transcript +0000220 isoleucine_tRNA_primary_transcript +0000221 leucine_tRNA_primary_transcript +0000222 lysine_tRNA_primary_transcript +0000223 methionine_tRNA_primary_transcript +0000224 phenylalanine_tRNA_primary_transcript +0000225 proline_tRNA_primary_transcript +0000226 serine_tRNA_primary_transcript +0000227 threonine_tRNA_primary_transcript +0000228 tryptophan_tRNA_primary_transcript +0000229 tyrosine_tRNA_primary_transcript +0000230 valine_tRNA_primary_transcript +0000231 snRNA_primary_transcript +0000232 snoRNA_primary_transcript +0000233 mature_transcript +0000234 mRNA +0000235 TF_binding_site +0000236 ORF +0000237 transcript_attribute +0000238 foldback_element +0000239 flanking_region +0000240 chromosome_variation +0000241 internal_UTR +0000242 untranslated_region_polycistronic_mRNA +0000243 internal_ribosome_entry_site +0000244 four_cutter_restriction_site +0000245 mRNA_by_polyadenylation_status +0000246 polyadenylated +0000247 mRNA_not_polyadenylated +0000248 sequence_length_alteration +0000249 six_cutter_restriction_site +0000250 modified_RNA_base_feature +0000251 eight_cutter_restriction_site +0000252 rRNA +0000253 tRNA +0000254 alanyl_tRNA +0000255 rRNA_small_subunit_primary_transcript +0000256 asparaginyl_tRNA +0000257 aspartyl_tRNA +0000258 cysteinyl_tRNA +0000259 glutaminyl_tRNA +0000260 glutamyl_tRNA +0000261 glycyl_tRNA +0000262 histidyl_tRNA +0000263 isoleucyl_tRNA +0000264 leucyl_tRNA +0000265 lysyl_tRNA +0000266 methionyl_tRNA +0000267 phenylalanyl_tRNA +0000268 prolyl_tRNA +0000269 seryl_tRNA +0000270 threonyl_tRNA +0000271 tryptophanyl_tRNA +0000272 tyrosyl_tRNA +0000273 valyl_tRNA +0000274 snRNA +0000275 snoRNA +0000276 miRNA +0000277 bound_by_factor +0000278 transcript_bound_by_nucleic_acid +0000279 transcript_bound_by_protein +0000280 engineered_gene +0000281 engineered_foreign_gene +0000282 mRNA_with_minus_1_frameshift +0000283 engineered_foreign_transposable_element_gene +0000284 type_I_enzyme_restriction_site +0000285 foreign_gene +0000286 long_terminal_repeat +0000287 fusion_gene +0000288 engineered_fusion_gene +0000289 microsatellite +0000290 dinucleotide_repeat_microsatellite_feature +0000291 trinucleotide_repeat_microsatellite_feature +0000292 repetitive_element +0000293 engineered_foreign_repetitive_element +0000294 inverted_repeat +0000295 U12_intron +0000296 origin_of_replication +0000297 D_loop +0000298 recombination_feature +0000299 specific_recombination_site +0000300 recombination_feature_of_rearranged_gene +0000301 vertebrate_immune_system_gene_recombination_feature +0000302 J_gene_recombination_feature +0000303 clip +0000304 type_II_enzyme_restriction_site +0000305 modified_DNA_base +0000306 methylated_DNA_base_feature +0000307 CpG_island +0000308 sequence_feature_locating_method +0000309 computed_feature +0000310 predicted_ab_initio_computation +0000311 computed_feature_by_similarity +0000312 experimentally_determined +0000313 stem_loop +0000314 direct_repeat +0000315 TSS +0000316 CDS +0000317 cDNA_clone +0000318 start_codon +0000319 stop_codon +0000320 intronic_splice_enhancer +0000321 mRNA_with_plus_1_frameshift +0000322 nuclease_hypersensitive_site +0000323 coding_start +0000324 tag +0000325 rRNA_large_subunit_primary_transcript +0000326 SAGE_tag +0000327 coding_end +0000328 microarray_oligo +0000329 mRNA_with_plus_2_frameshift +0000330 conserved_region +0000331 STS +0000332 coding_conserved_region +0000333 exon_junction +0000334 nc_conserved_region +0000335 mRNA_with_minus_2_frameshift +0000336 pseudogene +0000337 RNAi_reagent +0000338 MITE +0000339 recombination_hotspot +0000340 chromosome +0000341 chromosome_band +0000342 site_specific_recombination_target_region +0000343 match +0000344 splice_enhancer +0000345 EST +0000346 loxP_site +0000347 nucleotide_match +0000348 nucleic_acid +0000349 protein_match +0000350 FRT_site +0000351 synthetic_sequence +0000352 DNA +0000353 sequence_assembly +0000354 group_1_intron_homing_endonuclease_target_region +0000355 haplotype_block +0000356 RNA +0000357 flanked +0000359 floxed +0000360 codon +0000361 FRT_flanked +0000362 invalidated_by_chimeric_cDNA +0000363 floxed_gene +0000364 transposable_element_flanking_region +0000365 integron +0000366 insertion_site +0000367 attI_site +0000368 transposable_element_insertion_site +0000369 integrase_coding_region +0000370 small_regulatory_ncRNA +0000371 conjugative_transposon +0000372 enzymatic_RNA +0000373 recombinationally_inverted_gene +0000374 ribozyme +0000375 cytosolic_5_8S_rRNA +0000376 RNA_6S +0000377 CsrB_RsmB_RNA +0000378 DsrA_RNA +0000379 GcvB_RNA +0000380 hammerhead_ribozyme +0000381 group_IIA_intron +0000382 group_IIB_intron +0000383 MicF_RNA +0000384 OxyS_RNA +0000385 RNase_MRP_RNA +0000386 RNase_P_RNA +0000387 RprA_RNA +0000388 RRE_RNA +0000389 spot_42_RNA +0000390 telomerase_RNA +0000391 U1_snRNA +0000392 U2_snRNA +0000393 U4_snRNA +0000394 U4atac_snRNA +0000395 U5_snRNA +0000396 U6_snRNA +0000397 U6atac_snRNA +0000398 U11_snRNA +0000399 U12_snRNA +0000400 sequence_attribute +0000401 gene_attribute +0000402 enhancer_attribute +0000403 U14_snoRNA +0000404 vault_RNA +0000405 Y_RNA +0000406 twintron +0000407 cytosolic_18S_rRNA +0000408 site +0000409 binding_site +0000410 protein_binding_site +0000411 rescue_region +0000412 restriction_fragment +0000413 sequence_difference +0000414 invalidated_by_genomic_contamination +0000415 invalidated_by_genomic_polyA_primed_cDNA +0000416 invalidated_by_partial_processing +0000417 polypeptide_domain +0000418 signal_peptide +0000419 mature_protein_region +0000420 five_prime_terminal_inverted_repeat +0000421 three_prime_terminal_inverted_repeat +0000422 U5_LTR_region +0000423 R_LTR_region +0000424 U3_LTR_region +0000425 five_prime_LTR +0000426 three_prime_LTR +0000427 R_five_prime_LTR_region +0000428 U5_five_prime_LTR_region +0000429 U3_five_prime_LTR_region +0000430 R_three_prime_LTR_region +0000431 U3_three_prime_LTR_region +0000432 U5_three_prime_LTR_region +0000433 non_LTR_retrotransposon_polymeric_tract +0000434 target_site_duplication +0000435 RR_tract +0000436 ARS +0000437 assortment_derived_duplication +0000438 gene_not_polyadenylated +0000439 inverted_ring_chromosome +0000440 vector_replicon +0000441 ss_oligo +0000442 ds_oligo +0000443 polymer_attribute +0000444 three_prime_noncoding_exon +0000445 five_prime_noncoding_exon +0000446 UTR_intron +0000447 five_prime_UTR_intron +0000448 three_prime_UTR_intron +0000449 random_sequence +0000450 interband +0000451 gene_with_polyadenylated_mRNA +0000452 transgene_attribute +0000453 chromosomal_transposition +0000454 rasiRNA +0000455 gene_with_mRNA_with_frameshift +0000456 recombinationally_rearranged_gene +0000457 interchromosomal_duplication +0000458 D_gene_segment +0000459 gene_with_trans_spliced_transcript +0000460 vertebrate_immunoglobulin_T_cell_receptor_segment +0000461 inversion_derived_bipartite_deficiency +0000462 pseudogenic_region +0000463 encodes_alternately_spliced_transcripts +0000464 decayed_exon +0000465 inversion_derived_deficiency_plus_duplication +0000466 V_gene_segment +0000467 post_translationally_regulated_by_protein_stability +0000468 golden_path_fragment +0000469 post_translationally_regulated_by_protein_modification +0000470 J_gene_segment +0000471 autoregulated +0000472 tiling_path +0000473 negatively_autoregulated +0000474 tiling_path_fragment +0000475 positively_autoregulated +0000476 contig_read +0000477 polycistronic_gene +0000478 C_gene_segment +0000479 trans_spliced_transcript +0000480 tiling_path_clone +0000481 terminal_inverted_repeat +0000482 vertebrate_immunoglobulin_T_cell_receptor_gene_cluster +0000483 nc_primary_transcript +0000484 three_prime_coding_exon_noncoding_region +0000485 DJ_J_cluster +0000486 five_prime_coding_exon_noncoding_region +0000487 VDJ_J_C_cluster +0000488 VDJ_J_cluster +0000489 VJ_C_cluster +0000490 VJ_J_C_cluster +0000491 VJ_J_cluster +0000492 D_gene_recombination_feature +0000493 three_prime_D_heptamer +0000494 three_prime_D_nonamer +0000495 three_prime_D_spacer +0000496 five_prime_D_heptamer +0000497 five_prime_D_nonamer +0000498 five_prime_D_spacer +0000499 virtual_sequence +0000500 Hoogsteen_base_pair +0000501 reverse_Hoogsteen_base_pair +0000502 transcribed_region +0000503 alternately_spliced_gene_encodeing_one_transcript +0000504 D_DJ_C_cluster +0000505 D_DJ_cluster +0000506 D_DJ_J_C_cluster +0000507 pseudogenic_exon +0000508 D_DJ_J_cluster +0000509 D_J_C_cluster +0000510 VD_gene_segment +0000511 J_C_cluster +0000512 inversion_derived_deficiency_plus_aneuploid +0000513 J_cluster +0000514 J_nonamer +0000515 J_heptamer +0000516 pseudogenic_transcript +0000517 J_spacer +0000518 V_DJ_cluster +0000519 V_DJ_J_cluster +0000520 V_VDJ_C_cluster +0000521 V_VDJ_cluster +0000522 V_VDJ_J_cluster +0000523 V_VJ_C_cluster +0000524 V_VJ_cluster +0000525 V_VJ_J_cluster +0000526 V_cluster +0000527 V_D_DJ_C_cluster +0000528 V_D_DJ_cluster +0000529 V_D_DJ_J_C_cluster +0000530 V_D_DJ_J_cluster +0000531 V_D_J_C_cluster +0000532 V_D_J_cluster +0000533 V_heptamer +0000534 V_J_cluster +0000535 V_J_C_cluster +0000536 V_nonamer +0000537 V_spacer +0000538 V_gene_recombination_feature +0000539 DJ_C_cluster +0000540 DJ_J_C_cluster +0000541 VDJ_C_cluster +0000542 V_DJ_C_cluster +0000543 alternately_spliced_gene_encoding_greater_than_one_transcript +0000544 helitron +0000545 recoding_pseudoknot +0000546 designed_sequence +0000547 inversion_derived_bipartite_duplication +0000548 gene_with_edited_transcript +0000549 inversion_derived_duplication_plus_aneuploid +0000550 aneuploid_chromosome +0000551 polyA_signal_sequence +0000552 Shine_Dalgarno_sequence +0000553 polyA_site +0000554 assortment_derived_deficiency_plus_duplication +0000555 five_prime_clip +0000556 five_prime_D_recombination_signal_sequence +0000557 three_prime_clip +0000558 C_cluster +0000559 D_cluster +0000560 D_J_cluster +0000561 heptamer_of_recombination_feature_of_vertebrate_immune_system_gene +0000562 nonamer_of_recombination_feature_of_vertebrate_immune_system_gene +0000563 vertebrate_immune_system_gene_recombination_spacer +0000564 V_DJ_J_C_cluster +0000565 V_VDJ_J_C_cluster +0000566 V_VJ_J_C_cluster +0000567 inversion_derived_aneuploid_chromosome +0000568 bidirectional_promoter +0000569 retrotransposed +0000570 three_prime_D_recombination_signal_sequence +0000571 miRNA_encoding +0000572 DJ_gene_segment +0000573 rRNA_encoding +0000574 VDJ_gene_segment +0000575 scRNA_encoding +0000576 VJ_gene_segment +0000577 centromere +0000578 snoRNA_encoding +0000579 edited_transcript_feature +0000580 methylation_guide_snoRNA_primary_transcript +0000581 cap +0000582 rRNA_cleavage_snoRNA_primary_transcript +0000583 pre_edited_region +0000584 tmRNA +0000585 C_D_box_snoRNA_encoding +0000586 tmRNA_primary_transcript +0000587 group_I_intron +0000588 autocatalytically_spliced_intron +0000589 SRP_RNA_primary_transcript +0000590 SRP_RNA +0000591 pseudoknot +0000592 H_pseudoknot +0000593 C_D_box_snoRNA +0000594 H_ACA_box_snoRNA +0000595 C_D_box_snoRNA_primary_transcript +0000596 H_ACA_box_snoRNA_primary_transcript +0000597 transcript_edited_by_U_insertion/deletion +0000598 edited_by_C_insertion_and_dinucleotide_insertion +0000599 edited_by_C_to_U_substitution +0000600 edited_by_A_to_I_substitution +0000601 edited_by_G_addition +0000602 guide_RNA +0000603 group_II_intron +0000604 editing_block +0000605 intergenic_region +0000606 editing_domain +0000607 unedited_region +0000608 H_ACA_box_snoRNA_encoding +0000609 oligo_U_tail +0000610 polyA_sequence +0000611 branch_site +0000612 polypyrimidine_tract +0000613 bacterial_RNApol_promoter +0000614 bacterial_terminator +0000615 terminator_of_type_2_RNApol_III_promoter +0000616 transcription_end_site +0000617 RNApol_III_promoter_type_1 +0000618 RNApol_III_promoter_type_2 +0000619 A_box +0000620 B_box +0000621 RNApol_III_promoter_type_3 +0000622 C_box +0000623 snRNA_encoding +0000624 telomere +0000625 silencer +0000626 chromosomal_regulatory_element +0000627 insulator +0000628 chromosomal_structural_element +0000629 five_prime_open_reading_frame +0000630 upstream_AUG_codon +0000631 polycistronic_primary_transcript +0000632 monocistronic_primary_transcript +0000633 monocistronic_mRNA +0000634 polycistronic_mRNA +0000635 mini_exon_donor_RNA +0000636 spliced_leader_RNA +0000637 engineered_plasmid +0000638 transcribed_spacer_region +0000639 internal_transcribed_spacer_region +0000640 external_transcribed_spacer_region +0000641 tetranucleotide_repeat_microsatellite_feature +0000642 SRP_RNA_encoding +0000643 minisatellite +0000644 antisense_RNA +0000645 antisense_primary_transcript +0000646 siRNA +0000647 miRNA_primary_transcript +0000650 cytosolic_SSU_rRNA +0000651 cytosolic_LSU_rRNA +0000652 cytosolic_5S_rRNA +0000653 cytosolic_28S_rRNA +0000654 maxicircle_gene +0000655 ncRNA +0000656 stRNA_encoding +0000657 repeat_region +0000658 dispersed_repeat +0000659 tmRNA_encoding +0000660 DNA_invertase_target_sequence +0000661 intron_attribute +0000662 spliceosomal_intron +0000663 tRNA_encoding +0000664 introgressed_chromosome_region +0000665 monocistronic_transcript +0000666 mobile_intron +0000667 insertion +0000668 EST_match +0000669 sequence_rearrangement_feature +0000670 chromosome_breakage_sequence +0000671 internal_eliminated_sequence +0000672 macronucleus_destined_segment +0000673 transcript +0000674 non_canonical_splice_site +0000675 canonical_splice_site +0000676 canonical_three_prime_splice_site +0000677 canonical_five_prime_splice_site +0000678 non_canonical_three_prime_splice_site +0000679 non_canonical_five_prime_splice_site +0000680 non_canonical_start_codon +0000681 aberrant_processed_transcript +0000682 splicing_feature +0000683 exonic_splice_enhancer +0000684 nuclease_sensitive_site +0000685 DNaseI_hypersensitive_site +0000686 translocation_element +0000687 deletion_junction +0000688 golden_path +0000689 cDNA_match +0000690 gene_with_polycistronic_transcript +0000691 cleaved_initiator_methionine +0000692 gene_with_dicistronic_transcript +0000693 gene_with_recoded_mRNA +0000694 SNP +0000695 reagent +0000696 oligo +0000697 gene_with_stop_codon_read_through +0000698 gene_with_stop_codon_redefined_as_pyrrolysine +0000699 junction +0000700 remark +0000701 possible_base_call_error +0000702 possible_assembly_error +0000703 experimental_result_region +0000704 gene +0000705 tandem_repeat +0000706 trans_splice_acceptor_site +0000707 trans_splice_donor_site +0000708 SL1_acceptor_site +0000709 SL2_acceptor_site +0000710 gene_with_stop_codon_redefined_as_selenocysteine +0000711 gene_with_mRNA_recoded_by_translational_bypass +0000712 gene_with_transcript_with_translational_frameshift +0000713 DNA_motif +0000714 nucleotide_motif +0000715 RNA_motif +0000716 dicistronic_mRNA +0000717 reading_frame +0000718 blocked_reading_frame +0000719 ultracontig +0000720 foreign_transposable_element +0000721 gene_with_dicistronic_primary_transcript +0000722 gene_with_dicistronic_mRNA +0000723 iDNA +0000724 oriT +0000725 transit_peptide +0000726 repeat_unit +0000727 cis_regulatory_module +0000728 intein +0000729 intein_containing +0000730 gap +0000731 fragmentary +0000732 predicted +0000733 feature_attribute +0000734 exemplar_mRNA +0000735 sequence_location +0000736 organelle_sequence +0000737 mitochondrial_sequence +0000738 nuclear_sequence +0000739 nucleomorphic_sequence +0000740 plastid_sequence +0000741 kinetoplast +0000742 maxicircle +0000743 apicoplast_sequence +0000744 chromoplast_sequence +0000745 chloroplast_sequence +0000746 cyanelle_sequence +0000747 leucoplast_sequence +0000748 proplastid_sequence +0000749 plasmid_location +0000750 amplification_origin +0000751 proviral_location +0000752 gene_group_regulatory_region +0000753 clone_insert +0000754 lambda_vector +0000755 plasmid_vector +0000756 cDNA +0000757 single_stranded_cDNA +0000758 double_stranded_cDNA +0000759 plasmid_clone +0000760 YAC_clone +0000761 phagemid_clone +0000762 PAC_clone +0000763 fosmid_clone +0000764 BAC_clone +0000765 cosmid_clone +0000766 pyrrolysyl_tRNA +0000767 clone_insert_start +0000768 episome +0000769 tmRNA_coding_piece +0000770 tmRNA_acceptor_piece +0000771 QTL +0000772 genomic_island +0000773 pathogenic_island +0000774 metabolic_island +0000775 adaptive_island +0000776 symbiosis_island +0000777 pseudogenic_rRNA +0000778 pseudogenic_tRNA +0000779 engineered_episome +0000780 transposable_element_attribute +0000781 transgenic +0000782 natural +0000783 engineered +0000784 foreign +0000785 cloned_region +0000786 reagent_attribute +0000787 clone_attribute +0000788 cloned +0000789 validated +0000790 invalidated +0000791 cloned_genomic +0000792 cloned_cDNA +0000793 engineered_DNA +0000794 engineered_rescue_region +0000795 rescue_mini_gene +0000796 transgenic_transposable_element +0000797 natural_transposable_element +0000798 engineered_transposable_element +0000799 engineered_foreign_transposable_element +0000800 assortment_derived_duplication +0000801 assortment_derived_deficiency_plus_duplication +0000802 assortment_derived_deficiency +0000803 assortment_derived_aneuploid +0000804 engineered_region +0000805 engineered_foreign_region +0000806 fusion +0000807 engineered_tag +0000808 validated_cDNA_clone +0000809 invalidated_cDNA_clone +0000810 chimeric_cDNA_clone +0000811 genomically_contaminated_cDNA_clone +0000812 polyA_primed_cDNA_clone +0000813 partially_processed_cDNA_clone +0000814 rescue +0000815 mini_gene +0000816 rescue_gene +0000817 wild_type +0000818 wild_type_rescue_gene +0000819 mitochondrial_chromosome +0000820 chloroplast_chromosome +0000821 chromoplast_chromosome +0000822 cyanelle_chromosome +0000823 leucoplast_chromosome +0000824 macronuclear_chromosome +0000825 micronuclear_chromosome +0000828 nuclear_chromosome +0000829 nucleomorphic_chromosome +0000830 chromosome_part +0000831 gene_member_region +0000832 promoter_region +0000833 transcript_region +0000834 mature_transcript_region +0000835 primary_transcript_region +0000836 mRNA_region +0000837 UTR_region +0000838 rRNA_primary_transcript_region +0000839 polypeptide_region +0000840 repeat_component +0000841 spliceosomal_intron_region +0000842 gene_component_region +0000843 bacterial_RNApol_promoter_region +0000844 RNApol_II_promoter_region +0000845 RNApol_III_promoter_type_1_region +0000846 RNApol_III_promoter_type_2_region +0000847 tmRNA_region +0000848 LTR_component +0000849 three_prime_LTR_component +0000850 five_prime_LTR_component +0000851 CDS_region +0000852 exon_region +0000853 homologous_region +0000854 paralogous_region +0000855 orthologous_region +0000856 conserved +0000857 homologous +0000858 orthologous +0000859 paralogous +0000860 syntenic +0000861 capped_primary_transcript +0000862 capped_mRNA +0000863 mRNA_attribute +0000864 exemplar +0000865 frameshift +0000866 minus_1_frameshift +0000867 minus_2_frameshift +0000868 plus_1_frameshift +0000869 plus_2_framshift +0000870 trans_spliced +0000871 polyadenylated_mRNA +0000872 trans_spliced_mRNA +0000873 edited_transcript +0000874 edited_transcript_by_A_to_I_substitution +0000875 bound_by_protein +0000876 bound_by_nucleic_acid +0000877 alternatively_spliced +0000878 monocistronic +0000879 dicistronic +0000880 polycistronic +0000881 recoded +0000882 codon_redefined +0000883 stop_codon_read_through +0000884 stop_codon_redefined_as_pyrrolysine +0000885 stop_codon_redefined_as_selenocysteine +0000886 recoded_by_translational_bypass +0000887 translationally_frameshifted +0000888 maternally_imprinted_gene +0000889 paternally_imprinted_gene +0000890 post_translationally_regulated_gene +0000891 negatively_autoregulated_gene +0000892 positively_autoregulated_gene +0000893 silenced +0000894 silenced_by_DNA_modification +0000895 silenced_by_DNA_methylation +0000896 translationally_regulated_gene +0000897 allelically_excluded_gene +0000898 epigenetically_modified_gene +0000899 nuclear_mitochondrial +0000900 processed +0000901 unequally_crossed_over +0000902 transgene +0000903 endogenous_retroviral_sequence +0000904 rearranged_at_DNA_level +0000905 status +0000906 independently_known +0000907 supported_by_sequence_similarity +0000908 supported_by_domain_match +0000909 supported_by_EST_or_cDNA +0000910 orphan +0000911 predicted_by_ab_initio_computation +0000912 asx_turn +0000913 cloned_cDNA_insert +0000914 cloned_genomic_insert +0000915 engineered_insert +0000916 edit_operation +0000917 insert_U +0000918 delete_U +0000919 substitute_A_to_I +0000920 insert_C +0000921 insert_dinucleotide +0000922 substitute_C_to_U +0000923 insert_G +0000924 insert_GC +0000925 insert_GU +0000926 insert_CU +0000927 insert_AU +0000928 insert_AA +0000929 edited_mRNA +0000930 guide_RNA_region +0000931 anchor_region +0000932 pre_edited_mRNA +0000933 intermediate +0000934 miRNA_target_site +0000935 edited_CDS +0000936 vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment +0000937 vertebrate_immune_system_feature +0000938 vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster +0000939 vertebrate_immune_system_gene_recombination_signal_feature +0000940 recombinationally_rearranged +0000941 recombinationally_rearranged_vertebrate_immune_system_gene +0000942 attP_site +0000943 attB_site +0000944 attL_site +0000945 attR_site +0000946 integration_excision_site +0000947 resolution_site +0000948 inversion_site +0000949 dif_site +0000950 attC_site +0000951 eukaryotic_terminator +0000952 oriV +0000953 oriC +0000954 DNA_chromosome +0000955 double_stranded_DNA_chromosome +0000956 single_stranded_DNA_chromosome +0000957 linear_double_stranded_DNA_chromosome +0000958 circular_double_stranded_DNA_chromosome +0000959 linear_single_stranded_DNA_chromosome +0000960 circular_single_stranded_DNA_chromosome +0000961 RNA_chromosome +0000962 single_stranded_RNA_chromosome +0000963 linear_single_stranded_RNA_chromosome +0000964 linear_double_stranded_RNA_chromosome +0000965 double_stranded_RNA_chromosome +0000966 circular_single_stranded_RNA_chromosome +0000967 circular_double_stranded_RNA_chromosome +0000968 sequence_replication_mode +0000969 rolling_circle +0000970 theta_replication +0000971 DNA_replication_mode +0000972 RNA_replication_mode +0000973 insertion_sequence +0000975 minicircle_gene +0000976 cryptic +0000977 anchor_binding_site +0000978 template_region +0000979 gRNA_encoding +0000980 minicircle +0000981 rho_dependent_bacterial_terminator +0000982 rho_independent_bacterial_terminator +0000983 strand_attribute +0000984 single +0000985 double +0000986 topology_attribute +0000987 linear +0000988 circular +0000989 class_II_RNA +0000990 class_I_RNA +0000991 genomic_DNA +0000992 BAC_cloned_genomic_insert +0000993 consensus +0000994 consensus_region +0000995 consensus_mRNA +0000996 predicted_gene +0000997 gene_fragment +0000998 recursive_splice_site +0000999 BAC_end +0001000 cytosolic_16S_rRNA +0001001 cytosolic_23S_rRNA +0001002 cytosolic_25S_rRNA +0001003 solo_LTR +0001004 low_complexity +0001005 low_complexity_region +0001006 prophage +0001007 cryptic_prophage +0001008 tetraloop +0001009 DNA_constraint_sequence +0001010 i_motif +0001011 PNA_oligo +0001012 DNAzyme +0001013 MNP +0001014 intron_domain +0001015 wobble_base_pair +0001016 internal_guide_sequence +0001017 silent_mutation +0001018 epitope +0001019 copy_number_variation +0001020 sequence_variant_affecting_copy_number +0001021 chromosome_breakpoint +0001022 inversion_breakpoint +0001023 allele +0001024 haplotype +0001025 polymorphic_sequence_variant +0001026 genome +0001027 genotype +0001028 diplotype +0001029 direction_attribute +0001030 forward +0001031 reverse +0001032 mitochondrial_DNA +0001033 chloroplast_DNA +0001034 miRtron +0001035 piRNA +0001036 arginyl_tRNA +0001037 mobile_genetic_element +0001038 extrachromosomal_mobile_genetic_element +0001039 integrated_mobile_genetic_element +0001040 integrated_plasmid +0001041 viral_sequence +0001042 phage_sequence +0001043 attCtn_site +0001044 nuclear_mt_pseudogene +0001045 cointegrated_plasmid +0001046 IRLinv_site +0001047 IRRinv_site +0001048 inversion_site_part +0001049 defective_conjugative_transposon +0001050 repeat_fragment +0001051 nested_region +0001052 nested_repeat +0001053 nested_transposon +0001054 transposon_fragment +0001055 transcriptional_cis_regulatory_region +0001056 splicing_regulatory_region +0001057 enhanceosome +0001058 promoter_targeting_sequence +0001059 sequence_alteration +0001060 sequence_variant +0001061 propeptide_cleavage_site +0001062 propeptide +0001063 immature_peptide_region +0001064 active_peptide +0001066 compositionally_biased_region_of_peptide +0001067 polypeptide_motif +0001068 polypeptide_repeat +0001070 polypeptide_structural_region +0001071 membrane_structure +0001072 extramembrane_polypeptide_region +0001073 cytoplasmic_polypeptide_region +0001074 non_cytoplasmic_polypeptide_region +0001075 intramembrane_polypeptide_region +0001076 membrane_peptide_loop +0001077 transmembrane_polypeptide_region +0001078 polypeptide_secondary_structure +0001079 polypeptide_structural_motif +0001080 coiled_coil +0001081 helix_turn_helix +0001082 polypeptide_sequencing_information +0001083 non_adjacent_residues +0001084 non_terminal_residue +0001085 sequence_conflict +0001086 sequence_uncertainty +0001087 cross_link +0001088 disulfide_bond +0001089 post_translationally_modified_region +0001090 covalent_binding_site +0001091 non_covalent_binding_site +0001092 polypeptide_metal_contact +0001093 protein_protein_contact +0001094 polypeptide_calcium_ion_contact_site +0001095 polypeptide_cobalt_ion_contact_site +0001096 polypeptide_copper_ion_contact_site +0001097 polypeptide_iron_ion_contact_site +0001098 polypeptide_magnesium_ion_contact_site +0001099 polypeptide_manganese_ion_contact_site +0001100 polypeptide_molybdenum_ion_contact_site +0001101 polypeptide_nickel_ion_contact_site +0001102 polypeptide_tungsten_ion_contact_site +0001103 polypeptide_zinc_ion_contact_site +0001104 catalytic_residue +0001105 polypeptide_ligand_contact +0001106 asx_motif +0001107 beta_bulge +0001108 beta_bulge_loop +0001109 beta_bulge_loop_five +0001110 beta_bulge_loop_six +0001111 beta_strand +0001112 antiparallel_beta_strand +0001113 parallel_beta_strand +0001114 peptide_helix +0001115 left_handed_peptide_helix +0001116 right_handed_peptide_helix +0001117 alpha_helix +0001118 pi_helix +0001119 three_ten_helix +0001120 polypeptide_nest_motif +0001121 polypeptide_nest_left_right_motif +0001122 polypeptide_nest_right_left_motif +0001123 schellmann_loop +0001124 schellmann_loop_seven +0001125 schellmann_loop_six +0001126 serine_threonine_motif +0001127 serine_threonine_staple_motif +0001128 polypeptide_turn_motif +0001129 asx_turn_left_handed_type_one +0001130 asx_turn_left_handed_type_two +0001131 asx_turn_right_handed_type_two +0001132 asx_turn_right_handed_type_one +0001133 beta_turn +0001134 beta_turn_left_handed_type_one +0001135 beta_turn_left_handed_type_two +0001136 beta_turn_right_handed_type_one +0001137 beta_turn_right_handed_type_two +0001138 gamma_turn +0001139 gamma_turn_classic +0001140 gamma_turn_inverse +0001141 serine_threonine_turn +0001142 st_turn_left_handed_type_one +0001143 st_turn_left_handed_type_two +0001144 st_turn_right_handed_type_one +0001145 st_turn_right_handed_type_two +0001146 polypeptide_variation_site +0001147 natural_variant_site +0001148 mutated_variant_site +0001149 alternate_sequence_site +0001150 beta_turn_type_six +0001151 beta_turn_type_six_a +0001152 beta_turn_type_six_a_one +0001153 beta_turn_type_six_a_two +0001154 beta_turn_type_six_b +0001155 beta_turn_type_eight +0001156 DRE_motif +0001157 DMv4_motif +0001158 E_box_motif +0001159 DMv5_motif +0001160 DMv3_motif +0001161 DMv2_motif +0001162 MTE +0001163 INR1_motif +0001164 DPE1_motif +0001165 DMv1_motif +0001166 GAGA_motif +0001167 NDM2_motif +0001168 NDM3_motif +0001169 ds_RNA_viral_sequence +0001170 polinton +0001171 rRNA_21S +0001172 tRNA_region +0001173 anticodon_loop +0001174 anticodon +0001175 CCA_tail +0001176 DHU_loop +0001177 T_loop +0001178 pyrrolysine_tRNA_primary_transcript +0001179 U3_snoRNA +0001180 AU_rich_element +0001181 Bruno_response_element +0001182 iron_responsive_element +0001183 morpholino_backbone +0001184 PNA +0001185 enzymatic +0001186 ribozymic +0001187 pseudouridylation_guide_snoRNA +0001188 LNA +0001189 LNA_oligo +0001190 TNA +0001191 TNA_oligo +0001192 GNA +0001193 GNA_oligo +0001194 R_GNA +0001195 R_GNA_oligo +0001196 S_GNA +0001197 S_GNA_oligo +0001198 ds_DNA_viral_sequence +0001199 ss_RNA_viral_sequence +0001200 negative_sense_ssRNA_viral_sequence +0001201 positive_sense_ssRNA_viral_sequence +0001202 ambisense_ssRNA_viral_sequence +0001203 RNA_polymerase_promoter +0001204 Phage_RNA_Polymerase_Promoter +0001205 SP6_RNA_Polymerase_Promoter +0001206 T3_RNA_Polymerase_Promoter +0001207 T7_RNA_Polymerase_Promoter +0001208 five_prime_EST +0001209 three_prime_EST +0001210 translational_frameshift +0001211 plus_1_translational_frameshift +0001212 plus_2_translational_frameshift +0001213 group_III_intron +0001214 noncoding_region_of_exon +0001215 coding_region_of_exon +0001216 endonuclease_spliced_intron +0001217 protein_coding_gene +0001218 transgenic_insertion +0001219 retrogene +0001220 silenced_by_RNA_interference +0001221 silenced_by_histone_modification +0001222 silenced_by_histone_methylation +0001223 silenced_by_histone_deacetylation +0001224 gene_silenced_by_RNA_interference +0001225 gene_silenced_by_histone_modification +0001226 gene_silenced_by_histone_methylation +0001227 gene_silenced_by_histone_deacetylation +0001228 dihydrouridine +0001229 pseudouridine +0001230 inosine +0001231 seven_methylguanine +0001232 ribothymidine +0001233 methylinosine +0001234 mobile +0001235 replicon +0001236 base +0001237 amino_acid +0001238 major_TSS +0001239 minor_TSS +0001240 TSS_region +0001241 encodes_alternate_transcription_start_sites +0001243 miRNA_primary_transcript_region +0001244 pre_miRNA +0001245 miRNA_stem +0001246 miRNA_loop +0001247 synthetic_oligo +0001248 assembly +0001249 fragment_assembly +0001250 fingerprint_map +0001251 STS_map +0001252 RH_map +0001253 sonicate_fragment +0001254 polyploid +0001255 autopolyploid +0001256 allopolyploid +0001257 homing_endonuclease_binding_site +0001258 octamer_motif +0001259 apicoplast_chromosome +0001260 sequence_collection +0001261 overlapping_feature_set +0001262 overlapping_EST_set +0001263 ncRNA_gene +0001264 gRNA_gene +0001265 miRNA_gene +0001266 scRNA_gene +0001267 snoRNA_gene +0001268 snRNA_gene +0001269 SRP_RNA_gene +0001271 tmRNA_gene +0001272 tRNA_gene +0001273 modified_adenosine +0001274 modified_inosine +0001275 modified_cytidine +0001276 modified_guanosine +0001277 modified_uridine +0001278 one_methylinosine +0001279 one_two_prime_O_dimethylinosine +0001280 two_prime_O_methylinosine +0001281 three_methylcytidine +0001282 five_methylcytidine +0001283 two_prime_O_methylcytidine +0001284 two_thiocytidine +0001285 N4_acetylcytidine +0001286 five_formylcytidine +0001287 five_two_prime_O_dimethylcytidine +0001288 N4_acetyl_2_prime_O_methylcytidine +0001289 lysidine +0001290 N4_methylcytidine +0001291 N4_2_prime_O_dimethylcytidine +0001292 five_hydroxymethylcytidine +0001293 five_formyl_two_prime_O_methylcytidine +0001294 N4_N4_2_prime_O_trimethylcytidine +0001295 one_methyladenosine +0001296 two_methyladenosine +0001297 N6_methyladenosine +0001298 two_prime_O_methyladenosine +0001299 two_methylthio_N6_methyladenosine +0001300 N6_isopentenyladenosine +0001301 two_methylthio_N6_isopentenyladenosine +0001302 N6_cis_hydroxyisopentenyl_adenosine +0001303 two_methylthio_N6_cis_hydroxyisopentenyl_adenosine +0001304 N6_glycinylcarbamoyladenosine +0001305 N6_threonylcarbamoyladenosine +0001306 two_methylthio_N6_threonyl_carbamoyladenosine +0001307 N6_methyl_N6_threonylcarbamoyladenosine +0001308 N6_hydroxynorvalylcarbamoyladenosine +0001309 two_methylthio_N6_hydroxynorvalyl_carbamoyladenosine +0001310 two_prime_O_ribosyladenosine_phosphate +0001311 N6_N6_dimethyladenosine +0001312 N6_2_prime_O_dimethyladenosine +0001313 N6_N6_2_prime_O_trimethyladenosine +0001314 one_two_prime_O_dimethyladenosine +0001315 N6_acetyladenosine +0001316 seven_deazaguanosine +0001317 queuosine +0001318 epoxyqueuosine +0001319 galactosyl_queuosine +0001320 mannosyl_queuosine +0001321 seven_cyano_seven_deazaguanosine +0001322 seven_aminomethyl_seven_deazaguanosine +0001323 archaeosine +0001324 one_methylguanosine +0001325 N2_methylguanosine +0001326 seven_methylguanosine +0001327 two_prime_O_methylguanosine +0001328 N2_N2_dimethylguanosine +0001329 N2_2_prime_O_dimethylguanosine +0001330 N2_N2_2_prime_O_trimethylguanosine +0001331 two_prime_O_ribosylguanosine_phosphate +0001332 wybutosine +0001333 peroxywybutosine +0001334 hydroxywybutosine +0001335 undermodified_hydroxywybutosine +0001336 wyosine +0001337 methylwyosine +0001338 N2_7_dimethylguanosine +0001339 N2_N2_7_trimethylguanosine +0001340 one_two_prime_O_dimethylguanosine +0001341 four_demethylwyosine +0001342 isowyosine +0001343 N2_7_2prirme_O_trimethylguanosine +0001344 five_methyluridine +0001345 two_prime_O_methyluridine +0001346 five_two_prime_O_dimethyluridine +0001347 one_methylpseudouridine +0001348 two_prime_O_methylpseudouridine +0001349 two_thiouridine +0001350 four_thiouridine +0001351 five_methyl_2_thiouridine +0001352 two_thio_two_prime_O_methyluridine +0001353 three_three_amino_three_carboxypropyl_uridine +0001354 five_hydroxyuridine +0001355 five_methoxyuridine +0001356 uridine_five_oxyacetic_acid +0001357 uridine_five_oxyacetic_acid_methyl_ester +0001358 five_carboxyhydroxymethyl_uridine +0001359 five_carboxyhydroxymethyl_uridine_methyl_ester +0001360 five_methoxycarbonylmethyluridine +0001361 five_methoxycarbonylmethyl_two_prime_O_methyluridine +0001362 five_methoxycarbonylmethyl_two_thiouridine +0001363 five_aminomethyl_two_thiouridine +0001364 five_methylaminomethyluridine +0001365 five_methylaminomethyl_two_thiouridine +0001366 five_methylaminomethyl_two_selenouridine +0001367 five_carbamoylmethyluridine +0001368 five_carbamoylmethyl_two_prime_O_methyluridine +0001369 five_carboxymethylaminomethyluridine +0001370 five_carboxymethylaminomethyl_two_prime_O_methyluridine +0001371 five_carboxymethylaminomethyl_two_thiouridine +0001372 three_methyluridine +0001373 one_methyl_three_three_amino_three_carboxypropyl_pseudouridine +0001374 five_carboxymethyluridine +0001375 three_two_prime_O_dimethyluridine +0001376 five_methyldihydrouridine +0001377 three_methylpseudouridine +0001378 five_taurinomethyluridine +0001379 five_taurinomethyl_two_thiouridine +0001380 five_isopentenylaminomethyl_uridine +0001381 five_isopentenylaminomethyl_two_thiouridine +0001382 five_isopentenylaminomethyl_two_prime_O_methyluridine +0001383 histone_binding_site +0001384 CDS_fragment +0001385 modified_amino_acid_feature +0001386 modified_glycine +0001387 modified_L_alanine +0001388 modified_L_asparagine +0001389 modified_L_aspartic_acid +0001390 modified_L_cysteine +0001391 modified_L_glutamic_acid +0001392 modified_L_threonine +0001393 modified_L_tryptophan +0001394 modified_L_glutamine +0001395 modified_L_methionine +0001396 modified_L_isoleucine +0001397 modified_L_phenylalanine +0001398 modified_L_histidine +0001399 modified_L_serine +0001400 modified_L_lysine +0001401 modified_L_leucine +0001402 modified_L_selenocysteine +0001403 modified_L_valine +0001404 modified_L_proline +0001405 modified_L_tyrosine +0001406 modified_L_arginine +0001407 peptidyl +0001408 cleaved_for_gpi_anchor_region +0001409 biomaterial_region +0001410 experimental_feature +0001411 biological_region +0001412 topologically_defined_region +0001413 translocation_breakpoint +0001414 insertion_breakpoint +0001415 deletion_breakpoint +0001416 five_prime_flanking_region +0001417 three_prime_flanking_region +0001418 transcribed_fragment +0001419 cis_splice_site +0001420 trans_splice_site +0001421 splice_junction +0001422 conformational_switch +0001423 dye_terminator_read +0001424 pyrosequenced_read +0001425 ligation_based_read +0001426 polymerase_synthesis_read +0001427 cis_regulatory_frameshift_element +0001428 expressed_sequence_assembly +0001429 DNA_binding_site +0001431 cryptic_gene +0001432 sequence_variant_affecting_polyadenylation +0001433 three_prime_RACE_clone +0001434 cassette_pseudogene +0001435 alanine +0001436 valine +0001437 leucine +0001438 isoleucine +0001439 proline +0001440 tryptophan +0001441 phenylalanine +0001442 methionine +0001443 glycine +0001444 serine +0001445 threonine +0001446 tyrosine +0001447 cysteine +0001448 glutamine +0001449 asparagine +0001450 lysine +0001451 arginine +0001452 histidine +0001453 aspartic_acid +0001454 glutamic_acid +0001455 selenocysteine +0001456 pyrrolysine +0001457 transcribed_cluster +0001458 unigene_cluster +0001459 CRISPR +0001460 insulator_binding_site +0001461 enhancer_binding_site +0001462 contig_collection +0001463 lincRNA +0001464 UST +0001465 three_prime_UST +0001466 five_prime_UST +0001467 RST +0001468 three_prime_RST +0001469 five_prime_RST +0001470 UST_match +0001471 RST_match +0001472 primer_match +0001473 miRNA_antiguide +0001474 trans_splice_junction +0001475 outron +0001476 natural_plasmid +0001477 gene_trap_construct +0001478 promoter_trap_construct +0001479 enhancer_trap_construct +0001480 PAC_end +0001481 RAPD +0001482 shadow_enhancer +0001483 SNV +0001484 X_element_combinatorial_repeat +0001485 Y_prime_element +0001486 standard_draft +0001487 high_quality_draft +0001488 improved_high_quality_draft +0001489 annotation_directed_improved_draft +0001490 noncontiguous_finished +0001491 finished_genome +0001492 intronic_regulatory_region +0001493 centromere_DNA_Element_I +0001494 centromere_DNA_Element_II +0001495 centromere_DNA_Element_III +0001496 telomeric_repeat +0001497 X_element +0001498 YAC_end +0001499 whole_genome_sequence_status +0001500 heritable_phenotypic_marker +0001501 peptide_collection +0001502 high_identity_region +0001503 processed_transcript +0001504 assortment_derived_variation +0001505 reference_genome +0001506 variant_genome +0001507 variant_collection +0001508 alteration_attribute +0001509 chromosomal_variation_attribute +0001510 intrachromosomal +0001511 interchromosomal +0001512 insertion_attribute +0001513 tandem +0001514 direct +0001515 inverted +0001516 free +0001517 inversion_attribute +0001518 pericentric +0001519 paracentric +0001520 translocaton_attribute +0001521 reciprocal +0001522 insertional +0001523 duplication_attribute +0001524 chromosomally_aberrant_genome +0001525 assembly_error_correction +0001526 base_call_error_correction +0001527 peptide_localization_signal +0001528 nuclear_localization_signal +0001529 endosomal_localization_signal +0001530 lysosomal_localization_signal +0001531 nuclear_export_signal +0001532 recombination_signal_sequence +0001533 cryptic_splice_site +0001534 nuclear_rim_localization_signal +0001535 P_TIR_transposon +0001536 functional_effect_variant +0001537 structural_variant +0001538 transcript_function_variant +0001539 translational_product_function_variant +0001540 level_of_transcript_variant +0001541 decreased_transcript_level_variant +0001542 increased_transcript_level_variant +0001543 transcript_processing_variant +0001544 editing_variant +0001545 polyadenylation_variant +0001546 transcript_stability_variant +0001547 decreased_transcript_stability_variant +0001548 increased_transcript_stability_variant +0001549 transcription_variant +0001550 rate_of_transcription_variant +0001551 increased_transcription_rate_variant +0001552 decreased_transcription_rate_variant +0001553 translational_product_level_variant +0001554 polypeptide_function_variant +0001555 decreased_translational_product_level +0001556 increased_translational_product_level +0001557 polypeptide_gain_of_function_variant +0001558 polypeptide_localization_variant +0001559 polypeptide_loss_of_function_variant +0001560 inactive_ligand_binding_site +0001561 polypeptide_partial_loss_of_function +0001562 polypeptide_post_translational_processing_variant +0001563 copy_number_change +0001564 gene_variant +0001565 gene_fusion +0001566 regulatory_region_variant +0001567 stop_retained_variant +0001568 splicing_variant +0001569 cryptic_splice_site_variant +0001570 cryptic_splice_acceptor +0001571 cryptic_splice_donor +0001572 exon_loss_variant +0001573 intron_gain_variant +0001574 splice_acceptor_variant +0001575 splice_donor_variant +0001576 transcript_variant +0001577 complex_transcript_variant +0001578 stop_lost +0001579 transcript_sequence_variant +0001580 coding_sequence_variant +0001582 initiator_codon_variant +0001583 missense_variant +0001585 conservative_missense_variant +0001586 non_conservative_missense_variant +0001587 stop_gained +0001589 frameshift_variant +0001590 terminator_codon_variant +0001591 frame_restoring_variant +0001592 minus_1_frameshift_variant +0001593 minus_2_frameshift_variant +0001594 plus_1_frameshift_variant +0001595 plus_2_frameshift_variant +0001596 transcript_secondary_structure_variant +0001597 compensatory_transcript_secondary_structure_variant +0001598 translational_product_structure_variant +0001599 3D_polypeptide_structure_variant +0001600 complex_3D_structural_variant +0001601 conformational_change_variant +0001602 complex_change_of_translational_product_variant +0001603 polypeptide_sequence_variant +0001604 amino_acid_deletion +0001605 amino_acid_insertion +0001606 amino_acid_substitution +0001607 conservative_amino_acid_substitution +0001608 non_conservative_amino_acid_substitution +0001609 elongated_polypeptide +0001610 elongated_polypeptide_C_terminal +0001611 elongated_polypeptide_N_terminal +0001612 elongated_in_frame_polypeptide_C_terminal +0001613 elongated_out_of_frame_polypeptide_C_terminal +0001614 elongated_in_frame_polypeptide_N_terminal_elongation +0001615 elongated_out_of_frame_polypeptide_N_terminal +0001616 polypeptide_fusion +0001617 polypeptide_truncation +0001618 inactive_catalytic_site +0001619 non_coding_transcript_variant +0001620 mature_miRNA_variant +0001621 NMD_transcript_variant +0001622 UTR_variant +0001623 5_prime_UTR_variant +0001624 3_prime_UTR_variant +0001626 incomplete_terminal_codon_variant +0001627 intron_variant +0001628 intergenic_variant +0001629 splice_site_variant +0001630 splice_region_variant +0001631 upstream_gene_variant +0001632 downstream_gene_variant +0001633 5KB_downstream_variant +0001634 500B_downstream_variant +0001635 5KB_upstream_variant +0001636 2KB_upstream_variant +0001637 rRNA_gene +0001638 piRNA_gene +0001639 RNase_P_RNA_gene +0001640 RNase_MRP_RNA_gene +0001641 lincRNA_gene +0001642 mathematically_defined_repeat +0001643 telomerase_RNA_gene +0001644 targeting_vector +0001645 genetic_marker +0001646 DArT_marker +0001647 kozak_sequence +0001648 nested_transposon +0001649 nested_repeat +0001650 inframe_variant +0001653 retinoic_acid_responsive_element +0001654 nucleotide_to_protein_binding_site +0001655 nucleotide_binding_site +0001656 metal_binding_site +0001657 ligand_binding_site +0001658 nested_tandem_repeat +0001659 promoter_element +0001660 core_eukaryotic_promoter_element +0001661 RNA_polymerase_II_TATA_box +0001662 RNA_polymerase_III_TATA_box +0001663 BREd_motif +0001664 DCE +0001665 DCE_SI +0001666 DCE_SII +0001667 DCE_SIII +0001668 proximal_promoter_element +0001669 RNApol_II_core_promoter +0001670 distal_promoter_element +0001671 bacterial_RNApol_promoter_sigma_70_element +0001672 bacterial_RNApol_promoter_sigma54_element +0001673 minus_12_signal +0001674 minus_24_signal +0001675 A_box_type_1 +0001676 A_box_type_2 +0001677 intermediate_element +0001678 regulatory_promoter_element +0001679 transcription_regulatory_region +0001680 translation_regulatory_region +0001681 recombination_regulatory_region +0001682 replication_regulatory_region +0001683 sequence_motif +0001684 experimental_feature_attribute +0001685 score +0001686 quality_value +0001687 restriction_enzyme_recognition_site +0001688 restriction_enzyme_cleavage_junction +0001689 five_prime_restriction_enzyme_junction +0001690 three_prime_restriction_enzyme_junction +0001691 blunt_end_restriction_enzyme_cleavage_site +0001692 sticky_end_restriction_enzyme_cleavage_site +0001693 blunt_end_restriction_enzyme_cleavage_junction +0001694 single_strand_restriction_enzyme_cleavage_site +0001695 restriction_enzyme_single_strand_overhang +0001696 experimentally_defined_binding_region +0001697 ChIP_seq_region +0001698 ASPE_primer +0001699 dCAPS_primer +0001700 histone_modification +0001701 histone_methylation_site +0001702 histone_acetylation_site +0001703 H3K9_acetylation_site +0001704 H3K14_acetylation_site +0001705 H3K4_monomethylation_site +0001706 H3K4_trimethylation +0001707 H3K9_trimethylation_site +0001708 H3K27_monomethylation_site +0001709 H3K27_trimethylation_site +0001710 H3K79_monomethylation_site +0001711 H3K79_dimethylation_site +0001712 H3K79_trimethylation_site +0001713 H4K20_monomethylation_site +0001714 H2BK5_monomethylation_site +0001715 ISRE +0001716 histone_ubiqitination_site +0001717 H2B_ubiquitination_site +0001718 H3K18_acetylation_site +0001719 H3K23_acetylation_site +0001720 epigenetically_modified_region +0001721 H3K27_acylation_site +0001722 H3K36_monomethylation_site +0001723 H3K36_dimethylation_site +0001724 H3K36_trimethylation_site +0001725 H3K4_dimethylation_site +0001726 H3K27_dimethylation_site +0001727 H3K9_monomethylation_site +0001728 H3K9_dimethylation_site +0001729 H4K16_acetylation_site +0001730 H4K5_acetylation_site +0001731 H4K8_acetylation_site +0001732 H3K27_methylation_site +0001733 H3K36_methylation_site +0001734 H3K4_methylation_site +0001735 H3K79_methylation_site +0001736 H3K9_methylation_site +0001737 histone_acylation_region +0001738 H4K_acylation_region +0001739 gene_with_non_canonical_start_codon +0001740 gene_with_start_codon_CUG +0001741 pseudogenic_gene_segment +0001742 copy_number_gain +0001743 copy_number_loss +0001744 UPD +0001745 maternal_uniparental_disomy +0001746 paternal_uniparental_disomy +0001747 open_chromatin_region +0001748 SL3_acceptor_site +0001749 SL4_acceptor_site +0001750 SL5_acceptor_site +0001751 SL6_acceptor_site +0001752 SL7_acceptor_site +0001753 SL8_acceptor_site +0001754 SL9_acceptor_site +0001755 SL10_acceptor_site +0001756 SL11_acceptor_site +0001757 SL12_acceptor_site +0001758 duplicated_pseudogene +0001759 unitary_pseudogene +0001760 non_processed_pseudogene +0001761 variant_quality +0001762 variant_origin +0001763 variant_frequency +0001764 unique_variant +0001765 rare_variant +0001766 polymorphic_variant +0001767 common_variant +0001768 fixed_variant +0001769 variant_phenotype +0001770 benign_variant +0001771 disease_associated_variant +0001772 disease_causing_variant +0001773 lethal_variant +0001774 quantitative_variant +0001775 maternal_variant +0001776 paternal_variant +0001777 somatic_variant +0001778 germline_variant +0001779 pedigree_specific_variant +0001780 population_specific_variant +0001781 de_novo_variant +0001782 TF_binding_site_variant +0001784 complex_structural_alteration +0001785 structural_alteration +0001786 loss_of_heterozygosity +0001787 splice_donor_5th_base_variant +0001788 U_box +0001789 mating_type_region +0001790 paired_end_fragment +0001791 exon_variant +0001792 non_coding_transcript_exon_variant +0001793 clone_end +0001794 point_centromere +0001795 regional_centromere +0001796 regional_centromere_central_core +0001797 centromeric_repeat +0001798 regional_centromere_inner_repeat_region +0001799 regional_centromere_outer_repeat_region +0001800 tasiRNA +0001801 tasiRNA_primary_transcript +0001802 increased_polyadenylation_variant +0001803 decreased_polyadenylation_variant +0001804 DDB_box +0001805 destruction_box +0001806 ER_retention_signal +0001807 KEN_box +0001808 mitochondrial_targeting_signal +0001809 signal_anchor +0001810 PIP_box +0001811 phosphorylation_site +0001812 transmembrane_helix +0001813 vacuolar_sorting_signal +0001814 coding_variant_quality +0001815 synonymous +0001816 non_synonymous +0001817 inframe +0001818 protein_altering_variant +0001819 synonymous_variant +0001820 inframe_indel +0001821 inframe_insertion +0001822 inframe_deletion +0001823 conservative_inframe_insertion +0001824 disruptive_inframe_insertion +0001825 conservative_inframe_deletion +0001826 disruptive_inframe_deletion +0001827 mRNA_read +0001828 genomic_DNA_read +0001829 mRNA_contig +0001830 AFLP_fragment +0001831 protein_hmm_match +0001832 immunoglobulin_region +0001833 V_region +0001834 C_region +0001835 N_region +0001836 S_region +0001837 mobile_element_insertion +0001838 novel_sequence_insertion +0001839 CSL_response_element +0001840 GATA_box +0001841 polymorphic_pseudogene +0001842 AP_1_binding_site +0001843 CRE +0001844 CuRE +0001845 DRE +0001846 FLEX_element +0001847 forkhead_motif +0001848 homol_D_box +0001849 homol_E_box +0001850 HSE +0001851 iron_repressed_GATA_element +0001852 mating_type_M_box +0001853 androgen_response_element +0001854 smFISH_probe +0001855 MCB +0001856 CCAAT_motif +0001857 Ace2_UAS +0001858 TR_box +0001859 STREP_motif +0001860 rDNA_intergenic_spacer_element +0001861 sterol_regulatory_element +0001862 GT_dinucleotide_repeat +0001863 GTT_trinucleotide_repeat +0001864 Sap1_recognition_motif +0001865 CDRE_motif +0001866 BAC_read_contig +0001867 candidate_gene +0001868 positional_candidate_gene +0001869 functional_candidate_gene +0001870 enhancerRNA +0001871 PCB +0001872 rearrangement_region +0001873 interchromosomal_breakpoint +0001874 intrachromosomal_breakpoint +0001875 unassigned_supercontig +0001876 partial_genomic_sequence_assembly +0001877 lncRNA +0001878 feature_variant +0001879 feature_ablation +0001880 feature_amplification +0001881 feature_translocation +0001882 feature_fusion +0001883 transcript_translocation +0001884 regulatory_region_translocation +0001885 TFBS_translocation +0001886 transcript_fusion +0001887 regulatory_region_fusion +0001888 TFBS_fusion +0001889 transcript_amplification +0001890 transcript_regulatory_region_fusion +0001891 regulatory_region_amplification +0001892 TFBS_amplification +0001893 transcript_ablation +0001894 regulatory_region_ablation +0001895 TFBS_ablation +0001896 transposable_element_CDS +0001897 transposable_element_pseudogene +0001898 dg_repeat +0001899 dh_repeat +0001901 AACCCT_box +0001902 splice_region +0001904 antisense_lncRNA +0001905 regional_centromere_outer_repeat_transcript +0001906 feature_truncation +0001907 feature_elongation +0001908 internal_feature_elongation +0001909 frameshift_elongation +0001910 frameshift_truncation +0001911 copy_number_increase +0001912 copy_number_decrease +0001913 bacterial_RNApol_promoter_sigma_ecf_element +0001914 rDNA_replication_fork_barrier +0001915 transcription_start_cluster +0001916 CAGE_tag +0001917 CAGE_cluster +0001918 5_methylcytosine +0001919 4_methylcytosine +0001920 N6_methyladenine +0001921 mitochondrial_contig +0001922 mitochondrial_supercontig +0001923 TERRA +0001924 ARRET +0001925 ARIA +0001926 anti_ARRET +0001927 telomeric_transcript +0001928 distal_duplication +0001929 mitochondrial_DNA_read +0001930 chloroplast_DNA_read +0001931 consensus_gDNA +0001932 restriction_enzyme_five_prime_single_strand_overhang +0001933 restriction_enzyme_three_prime_single_strand_overhang +0001934 monomeric_repeat +0001935 H3K20_trimethylation_site +0001936 H3K36_acetylation_site +0001937 H2BK12_acetylation_site +0001938 H2AK5_acetylation_site +0001939 H4K12_acetylation_site +0001940 H2BK120_acetylation_site +0001941 H4K91_acetylation_site +0001942 H2BK20_acetylation_site +0001943 H3K4_acetylation_site +0001944 H2AK9_acetylation_site +0001945 H3K56_acetylation_site +0001946 H2BK15_acetylation_site +0001947 H3R2_monomethylation_site +0001948 H3R2_dimethylation_site +0001949 H4R3_dimethylation_site +0001950 H4K4_trimethylation_site +0001951 H3K23_dimethylation_site +0001952 promoter_flanking_region +0001953 restriction_enzyme_assembly_scar +0001954 restriction_enzyme_region +0001955 protein_stability_element +0001956 protease_site +0001957 RNA_stability_element +0001958 lariat_intron +0001959 TCT_motif +0001960 5_hydroxymethylcytosine +0001961 5_formylcytosine +0001962 modified_adenine +0001963 modified_cytosine +0001964 modified_guanine +0001965 8_oxoguanine +0001966 5_carboxylcytosine +0001967 8_oxoadenine +0001968 coding_transcript_variant +0001969 coding_transcript_intron_variant +0001970 non_coding_transcript_intron_variant +0001971 zinc_finger_binding_site +0001972 histone_4_acetylation_site +0001973 histone_3_acetylation_site +0001974 CTCF_binding_site +0001975 five_prime_sticky_end_restriction_enzyme_cleavage_site +0001976 three_prime_sticky_end_restriction_enzyme_cleavage_site +0001977 ribonuclease_site +0001978 signature +0001979 RNA_stability_element +0001980 G_box +0001981 L_box +0001982 I-box +0001983 5_prime_UTR_premature_start_codon_variant +0001984 silent_mating_type_cassette_array +0001985 Okazaki_fragment +0001986 upstream_transcript_variant +0001987 downstream_transcript_variant +0001988 5_prime_UTR_premature_start_codon_gain_variant +0001989 5_prime_UTR_premature_start_codon_loss_variant +0001990 five_prime_UTR_premature_start_codon_location_variant +0001991 consensus_AFLP_fragment +0001992 nonsynonymous_variant +0001993 extended_cis_splice_site +0001994 intron_base_5 +0001995 extended_intronic_splice_region_variant +0001996 extended_intronic_splice_region +0001997 subtelomere +0001998 sgRNA +0001999 mating_type_region_motif +0002001 Y_region +0002002 Z1_region +0002003 Z2_region +0002004 ARS_consensus_sequence +0002005 DSR_motif +0002006 zinc_repressed_element +0002007 MNV +0002008 rare_amino_acid_variant +0002009 selenocysteine_loss +0002010 pyrrolysine_loss +0002011 intragenic_variant +0002012 start_lost +0002013 5_prime_UTR_truncation +0002014 5_prime_UTR_elongation +0002015 3_prime_UTR_truncation +0002016 3_prime_UTR_elongation +0002017 conserved_intergenic_variant +0002018 conserved_intron_variant +0002019 start_retained_variant +0002020 boundary_element +0002021 mating_type_region_replication_fork_barrier +0002022 priRNA +0002023 multiplexing_sequence_identifier +0002024 W_region +0002025 cis_acting_homologous_chromosome_pairing_region +0002026 intein_encoding_region +0002027 uORF +0002028 sORF +0002029 tnaORF +0002030 X_region +0002031 shRNA +0002032 moR +0002033 loR +0002034 miR_encoding_snoRNA_primary_transcript +0002035 lncRNA_primary_transcript +0002036 miR_encoding_lncRNA_primary_transcript +0002037 miR_encoding_tRNA_primary_transcript +0002038 shRNA_primary_transcript +0002039 miR_encoding_shRNA_primary_transcript +0002040 vaultRNA_primary_transcript +0002041 miR_encoding_vaultRNA_primary_transcript +0002042 Y_RNA_primary_transcript +0002043 miR_encoding_Y_RNA_primary_transcript +0002044 TCS_element +0002045 pheromone_response_element +0002046 FRE +0002047 transcription_pause_site +0002048 disabled_reading_frame +0002049 H3K27_acetylation_site +0002050 constitutive_promoter +0002051 inducible_promoter +0002052 dominant_negative_variant +0002053 gain_of_function_variant +0002054 loss_of_function_variant +0002055 null_mutation +0002056 intronic_splicing_silencer +0002057 intronic_splicing_enhancer +0002058 exonic_splicing_silencer +0002059 recombination_enhancer +0002060 interchromosomal_translocation +0002061 intrachromosomal_translocation +0002062 complex_chromosomal_rearrangement +0002063 Alu_insertion +0002064 LINE1_insertion +0002065 SVA_insertion +0002066 mobile_element_deletion +0002067 HERV_deletion +0002068 SVA_deletion +0002069 LINE1_deletion +0002070 Alu_deletion +0002071 CDS_supported_by_peptide_spectrum_match +0002072 sequence_comparison +0002073 no_sequence_alteration +0002074 intergenic_1kb_variant +0002075 incomplete_transcript_variant +0002076 incomplete_transcript_3UTR_variant +0002077 incomplete_transcript_5UTR_variant +0002078 incomplete_transcript_intronic_variant +0002079 incomplete_transcript_splice_region_variant +0002080 incomplete_transcript_exonic_variant +0002081 incomplete_transcript_CDS +0002082 incomplete_transcript_coding_splice_variant +0002083 2KB_downstream_variant +0002084 exonic_splice_region_variant +0002085 unidirectional_gene_fusion +0002086 bidirectional_gene_fusion +0002087 pseudogenic_CDS +0002088 non_coding_transcript_splice_region_variant +0002089 3_prime_UTR_exon_variant +0002090 3_prime_UTR_intron_variant +0002091 5_prime_UTR_intron_variant +0002092 5_prime_UTR_exon_variant +0002093 structural_interaction_variant +0002094 non_allelic_homologous_recombination_region +0002095 scaRNA +0002096 short_tandem_repeat_variation +0002097 vertebrate_immune_system_pseudogene +0002098 immunoglobulin_pseudogene +0002099 T_cell_receptor_pseudogene +0002100 IG_C_pseudogene +0002101 IG_J_pseudogene +0002102 IG_V_pseudogene +0002103 TR_V_pseudogene +0002104 TR_J_pseudogene +0002105 translated_processed_pseudogene +0002106 translated_unprocessed_pseudogene +0002107 transcribed_unprocessed_pseudogene +0002108 transcribed_unitary_pseudogene +0002109 transcribed_processed_pseudogene +0002110 polymorphic_pseudogene_with_retained_intron +0002111 pseudogene_processed_transcript +0002112 coding_transcript_with_retained_intron +0002113 lncRNA_with_retained_intron +0002114 NMD_transcript +0002115 pseudogenic_transcript_with_retained_intron +0002116 polymorphic_pseudogene_processed_transcript +0002117 +0002118 NMD_polymorphic_pseudogene_transcript +0002119 allelic_frequency +0002120 three_prime_overlapping_ncrna +0002121 vertebrate_immune_system_gene +0002122 immunoglobulin_gene +0002123 IG_C_gene +0002124 IG_D_gene +0002125 IG_J_gene +0002126 IG_V_gene +0002127 lncRNA_gene +0002128 mt_rRNA +0002129 mt_tRNA +0002130 NSD_transcript +0002131 sense_intronic_lncRNA +0002132 sense_overlap_lncRNA +0002133 T_cell_receptor_gene +0002134 TR_C_Gene +0002135 TR_D_Gene +0002136 TR_J_Gene +0002137 TR_V_Gene +0002138 predicted_transcript +0002139 unconfirmed_transcript +0002140 early_origin_of_replication +0002141 late_origin_of_replication +0002142 histone_2A_acetylation_site +0002143 histone_2B_acetylation_site +0002144 histone_2AZ_acetylation_site +0002145 H2AZK4_acetylation_site +0002146 H2AZK7_acetylation_site +0002147 H2AZK11_acetylation_site +0002148 H2AZK13_acetylation_site +0002149 H2AZK15_acetylation_site +0002150 AUG_initiated_uORF +0002151 non_AUG_initiated_uORF +0002152 genic_downstream_transcript_variant +0002153 genic_upstream_transcript_variant +0002154 mitotic_recombination_region +0002155 meiotic_recombination_region +0002156 CArG_box +0002157 Mat2P +0002158 Mat3M +0002159 SHP_box +0002160 sequence_length_variant +0002161 short_tandem_repeat_change +0002162 short_tandem_repeat_expansion +0002163 short_tandem_repeat_contraction +0002164 H2BK5_acetylation_site +0002165 trinucleotide_repeat_expansion +0002166 ref_miRNA +0002167 isomiR +0002168 RNA_thermometer +0002169 splice_polypyrimidine_tract_variant +0002170 splice_donor_region_variant +0002171 telomeric_D_loop +0002172 sequence_alteration_artifact +0002173 indel_artifact +0002174 deletion_artifact +0002175 insertion_artifact +0002176 substitution_artifact +0002177 duplication_artifact +0002178 SNV_artifact +0002179 MNV_artifact +0002180 enzymatic_RNA_gene +0002181 ribozyme_gene +0002182 antisense_lncRNA_gene +0002183 sense_overlap_lncRNA_gene +0002184 sense_intronic_lncRNA_gene +0002185 bidirectional_promoter_lncRNA_gene +0002186 mutational_hotspot +0002187 HERV_insertion +0002188 functional_gene_region +0002189 allelic_pseudogene +0002190 enhancer_blocking_element +0002191 imprinting_control_region +0002192 flanking_repeat +0002193 processed_pseudogenic_rRNA +0002194 unprocessed_pseudogenic_rRNA +0002195 unitary_pseudogenic_rRNA +0002196 allelic_pseudogenic_rRNA +0002197 processed_pseudogenic_tRNA +0002198 unprocessed_pseudogenic_tRNA +0002199 unitary_pseudogenic_tRNA +0002200 allelic_pseudogenic_tRNA +0002201 terminal_repeat +0002202 repeat_instability_region +0002203 replication_start_site +0002204 nucleotide_cleavage_site +0002205 response_element +0002206 sequence_source +0002207 UNAAAC_motif +0002208 long_terminal_repeat_transcript +0002209 genomic_DNA_contig +0002210 presence_absence_variation +0002211 circular_plasmid +0002212 linear_plasmid +0002213 transcription_termination_signal +0002214 redundant_inserted_stop_gained +0002215 Zas1_recognition_motif +0002216 Pho7_binding_site +0002217 unspecified_indel +0002218 functionally_abnormal +0002219 functionally_normal +0002220 function_uncertain_variant +0002221 eukaryotic_promoter +0002222 prokaryotic_promoter +0002223 inert_DNA_spacer +0002224 2A_self_cleaving_peptide_region +0002225 LOZ1_response_element +0002226 group_IIC_intron +0002227 CDS_extension +0002228 CDS_five_prime_extension +0002229 CDS_three_prime_extension +0002230 CAAX_box +0002231 self_cleaving_ribozyme +0002232 selection_marker +0002233 homologous_chromosome_recognition_and_pairing_locus +0002234 pumilio_response_element +0002235 SUMO_interaction_motif +0002236 cytosolic_rRNA_18S_gene +0002237 cytosolic_rRNA_16S_gene +0002238 cytosolic_rRNA_5S_gene +0002239 cytosolic_rRNA_28S_gene +0002240 cytosolic_rRNA_5_8S_gene +0002241 rRNA_21S_gene +0002242 cytosolic_rRNA_25S_gene +0002243 cytosolic_rRNA_23S_gene +0002244 partially_duplicated_transcript +0002245 five_prime_duplicated_transcript +0002246 three_prime_duplicated_transcript +0002247 sncRNA +0002248 spurious_protein +0002249 mature_protein_region_of_CDS +0002250 propeptide_region_of_CDS +0002251 signal_peptide_region_of_CDS +0002252 transit_peptide_region_of_CDS +0002253 stem_loop_region +0002254 loop +0002255 stem +0002256 non_complimentary_stem +0002257 knob +0002258 teb1_recognition_motif +0002259 polyA_site_cluster +0002260 LARD +0002261 TRIM +0002262 Watson_strand +0002263 Crick_strand +0002264 Copia_LTR_retrotransposon +0002265 Gypsy_LTR_retrotransposon +0002266 Bel_Pao_LTR_retrotransposon +0002267 Retrovirus_LTR_retrotransposon +0002268 Endogenous_Retrovirus_LTR_retrotransposon +0002269 R2_LINE_retrotransposon +0002270 RTE_LINE_retrotransposon +0002271 Jockey_LINE_retrotransposon +0002272 L1_LINE_retrotransposon +0002273 I_LINE_retrotransposon +0002274 tRNA_SINE_retrotransposon +0002275 7SL_SINE_retrotransposon +0002276 5S_SINE_retrotransposon +0002277 Crypton_YR_transposon +0002278 Tc1_Mariner_TIR_transposon +0002279 hAT_TIR_transposon +0002280 Mutator_TIR_transposon +0002281 Merlin_TIR_transposon +0002282 Transib_TIR_transposon +0002283 piggyBac_TIR_transposon +0002284 PIF_Harbinger_TIR_transposon +0002285 CACTA_TIR_transposon +0002286 YR_retrotransposon +0002287 DIRS_YR_retrotransposon +0002288 Ngaro_YR_retrotransposon +0002289 Viper_YR_retrotransposon +0002290 Penelope_retrotransposon +0002291 circular_ncRNA +0002292 circular_mRNA +0002293 mitochondrial_control_region +0002294 mitochondrial_D_loop +0002295 transcription_factor_regulatory_site +0002296 TFRS_module +0002297 TFRS_collection +0002298 simple_operon +0002299 complex_operon +0002300 unit_of_gene_expression +0002301 transcription_unit +0002302 simple_regulon +0002303 complex_regulon +0002304 topologically_associated_domain +0002305 topologically_associated_domain_boundary +0002306 chromatin_regulatory_region +0002307 DNA_loop +0002308 DNA_loop_anchor +0002309 core_promoter_element +0002310 cryptic_promoter +0002311 viral_promoter +0002312 core_prokaryotic_promoter_element +0002313 core_viral_promoter_element +0002314 altered_gene_product_level +0002315 increased_gene_product_level +0002316 decreased_gene_product_level +0002317 absent_gene_product +0002318 altered_gene_product_sequence +0002319 NMD_triggering_variant +0002320 NMD_escaping_variant +0002321 stop_gained_NMD_triggering +0002322 stop_gained_NMD_escaping +0002323 frameshift_variant_NMD_triggering +0002324 frameshift_variant_NMD_escaping +0002325 splice_donor_variant_NMD_triggering +0002326 splice_donor_variant_NMD_escaping +0002327 splice_acceptor_variant_NMD_triggering +0002328 splice_acceptor_variant_NMD_escaping +0002329 minus_1_translational_frameshift +0002330 minus_2_translational_frameshift +0002331 accessible_DNA_region +0002332 epigenomically_modified_region +0002333 amber_stop_codon +0002334 ochre_stop_codon +0002335 opal_stop_codon +0002336 cytosolic_rRNA_2S_gene +0002337 cytosolic_2S_rRNA +0002338 U7_snRNA +0002339 scaRNA_gene +0002340 RNA_7SK +0002341 RNA_7SK_gene +0002342 sncRNA_gene +0002343 cytosolic_rRNA +0002344 mt_SSU_rRNA +0002345 mt_LSU_rRNA +0002346 plastid_rRNA +0002347 plastid_SSU_rRNA +0002348 plastid_LSU_rRNA +0002349 fragile_site +0002350 common_fragile_site +0002351 rare_fragile_site +0002352 sisRNA +0002353 sbRNA_gene +0002354 sbRNA +0002355 hpRNA_gene +0002356 hpRNA +0002357 biosynthetic_gene_cluster +0002358 vault_RNA_gene +0002359 Y_RNA_gene +0002360 cytosolic_rRNA_gene +0002361 cytosolic_LSU_rRNA_gene +0002362 cytosolic_SSU_rRNA_gene +0002363 mt_rRNA_gene +0002364 mt_LSU_rRNA_gene +0002365 mt_SSU_rRNA_gene +0002366 plastid_rRNA_gene +0002367 plastid_LSU_rRNA_gene +0002368 plastid_SSU_rRNA_gene +0002369 C_D_box_scaRNA +0002370 H_ACA_box_scaRNA +0002371 C-D_H_ACA_box_scaRNA +0002372 C_D_box_scaRNA_gene +0002373 H_ACA_box_scaRNA_gene +0002374 C-D_H_ACA_box_scaRNA_gene +0002375 C_D_box_snoRNA_gene +0002376 H_ACA_box_snoRNA_gene +0002377 U14_snoRNA_gene +0002378 U3_snoRNA_gene +0002379 methylation_guide_snoRNA_gene +0002380 pseudouridylation_guide_snoRNA_gene +0002381 bidirectional_promoter_lncRNA +0002382 range_extender_element +00000000002382 5_prime_UTR_uORF_variant +0002383 oncogenic_variant +0005836 regulatory_region +0005837 U14_snoRNA_primary_transcript +0005841 methylation_guide_snoRNA +0005843 rRNA_cleavage_RNA +0005845 exon_of_single_exon_gene +0005847 cassette_array_member +0005848 gene_cassette_member +0005849 gene_subarray_member +0005850 primer_binding_site +0005851 gene_array +0005852 gene_subarray +0005853 gene_cassette +0005854 gene_cassette_array +0005855 gene_group +0005856 selenocysteine_tRNA_primary_transcript +0005857 selenocysteinyl_tRNA +0005858 syntenic_region +0100001 biochemical_region_of_peptide +0100002 molecular_contact_region +0100003 intrinsically_unstructured_polypeptide_region +0100004 catmat_left_handed_three +0100005 catmat_left_handed_four +0100006 catmat_right_handed_three +0100007 catmat_right_handed_four +0100008 alpha_beta_motif +0100009 lipoprotein_signal_peptide +0100010 no_output +0100011 cleaved_peptide_region +0100012 peptide_coil +0100013 hydrophobic_region_of_peptide +0100014 n_terminal_region +0100015 c_terminal_region +0100016 central_hydrophobic_region_of_signal_peptide +0100017 polypeptide_conserved_motif +0100018 polypeptide_binding_motif +0100019 polypeptide_catalytic_motif +0100020 polypeptide_DNA_contact +0100021 polypeptide_conserved_region +1000002 substitution +1000005 complex_substitution +1000008 point_mutation +1000009 transition +1000010 pyrimidine_transition +1000011 C_to_T_transition +1000012 C_to_T_transition_at_pCpG_site +1000013 T_to_C_transition +1000014 purine_transition +1000015 A_to_G_transition +1000016 G_to_A_transition +1000017 transversion +1000018 pyrimidine_to_purine_transversion +1000019 C_to_A_transversion +1000020 C_to_G_transversion +1000021 T_to_A_transversion +1000022 T_to_G_transversion +1000023 purine_to_pyrimidine_transversion +1000024 A_to_C_transversion +1000025 A_to_T_transversion +1000026 G_to_C_transversion +1000027 G_to_T_transversion +1000028 intrachromosomal_mutation +1000029 chromosomal_deletion +1000030 chromosomal_inversion +1000031 interchromosomal_mutation +1000032 delins +1000035 duplication +1000036 inversion +1000037 chromosomal_duplication +1000038 intrachromosomal_duplication +1000039 direct_tandem_duplication +1000040 inverted_tandem_duplication +1000041 intrachromosomal_transposition +1000042 compound_chromosome +1000043 Robertsonian_fusion +1000044 chromosomal_translocation +1000045 ring_chromosome +1000046 pericentric_inversion +1000047 paracentric_inversion +1000048 reciprocal_chromosomal_translocation +1000049 sequence_variation_affecting_transcript +1000050 sequence_variant_causing_no_change_in_transcript +1000054 sequence_variation_affecting_coding_sequence +1000055 sequence_variant_causing_initiator_codon_change_in_transcript +1000056 sequence_variant_causing_amino_acid_coding_codon_change_in_transcript +1000057 sequence_variant_causing_synonymous_codon_change_in_transcript +1000058 sequence_variant_causing_non_synonymous_codon_change_in_transcript +1000059 sequence_variant_causing_missense_codon_change_in_transcript +1000060 sequence_variant_causing_conservative_missense_codon_change_in_transcript +1000061 sequence_variant_causing_nonconservative_missense_codon_change_in_transcript +1000062 sequence_variant_causing_nonsense_codon_change_in_transcript +1000063 sequence_variant_causing_terminator_codon_change_in_transcript +1000064 sequence_variation_affecting_reading_frame +1000065 frameshift_sequence_variation +1000066 sequence_variant_causing_plus_1_frameshift_mutation +1000067 sequence_variant_causing_minus_1_frameshift +1000068 sequence_variant_causing_plus_2_frameshift +1000069 sequence_variant_causing_minus_2_frameshift +1000070 sequence_variant_affecting_transcript_processing +1000071 sequence_variant_affecting_splicing +1000072 sequence_variant_affecting_splice_donor +1000073 sequence_variant_affecting_splice_acceptor +1000074 sequence_variant_causing_cryptic_splice_activation +1000075 sequence_variant_affecting_editing +1000076 sequence_variant_affecting_transcription +1000078 sequence_variant_decreasing_rate_of_transcription +1000079 sequence_variation_affecting_transcript_sequence +1000080 sequence_variant_increasing_rate_of_transcription +1000081 sequence_variant_affecting_rate_of_transcription +1000082 sequence variant_affecting_transcript_stability +1000083 sequence_variant_increasing_transcript_stability +1000084 sequence_variant_decreasing_transcript_stability +1000085 sequence_variation_affecting_level_of_transcript +1000086 sequence_variation_decreasing_level_of_transcript +1000087 sequence_variation_increasing_level_of_transcript +1000088 sequence_variant_affecting_translational_product +1000089 sequence_variant_causing_no_change_of_translational_product +1000092 sequence_variant_causing_complex_change_of_translational_product +1000093 sequence_variant_causing_amino_acid_substitution +1000094 sequence_variant_causing_conservative_amino_acid_substitution +1000095 sequence_variant_causing_nonconservative_amino_acid_substitution +1000096 sequence_variant_causing_amino_acid_insertion +1000097 sequence_variant_causing_amino_acid_deletion +1000098 sequence_variant_causing_polypeptide_truncation +1000099 sequence_variant_causing_polypeptide_elongation +1000100 mutation_causing_polypeptide_N_terminal_elongation +1000101 mutation_causing_polypeptide_C_terminal_elongation +1000102 sequence_variant_affecting_level_of_translational_product +1000103 sequence_variant_decreasing_level_of_translation_product +1000104 sequence_variant_increasing_level_of_translation_product +1000105 sequence_variant_affecting_polypeptide_amino_acid_sequence +1000106 mutation_causing_inframe_polypeptide_N_terminal_elongation +1000107 mutation_causing_out_of_frame_polypeptide_N_terminal_elongation +1000108 mutaton_causing_inframe_polypeptide_C_terminal_elongation +1000109 mutation_causing_out_of_frame_polypeptide_C_terminal_elongation +1000110 frame_restoring_sequence_variant +1000111 sequence_variant_affecting_3D_structure_of_polypeptide +1000112 sequence_variant_causing_no_3D_structural_change +1000115 sequence_variant_causing_complex_3D_structural_change +1000116 sequence_variant_causing_conformational_change +1000117 sequence_variant_affecting_polypeptide_function +1000118 sequence_variant_causing_loss_of_function_of_polypeptide +1000119 sequence_variant_causing_inactive_ligand_binding_site +1000120 sequence_variant_causing_inactive_catalytic_site +1000121 sequence_variant_causing_polypeptide_localization_change +1000122 sequence_variant_causing_polypeptide_post_translational_processing_change +1000123 polypeptide_post_translational_processing_affected +1000124 sequence_variant_causing_partial_loss_of_function_of_polypeptide +1000125 sequence_variant_causing_gain_of_function_of_polypeptide +1000126 sequence_variant_affecting_transcript_secondary_structure +1000127 sequence_variant_causing_compensatory_transcript_secondary_structure_mutation +1000132 sequence_variant_effect +1000134 sequence_variant_causing_polypeptide_fusion +1000136 autosynaptic_chromosome +1000138 homo_compound_chromosome +1000140 hetero_compound_chromosome +1000141 chromosome_fission +1000142 dextrosynaptic_chromosome +1000143 laevosynaptic_chromosome +1000144 free_duplication +1000145 free_ring_duplication +1000147 deficient_translocation +1000148 inversion_cum_translocation +1000149 bipartite_duplication +1000150 cyclic_translocation +1000151 bipartite_inversion +1000152 uninverted_insertional_duplication +1000153 inverted_insertional_duplication +1000154 insertional_duplication +1000155 interchromosomal_transposition +1000156 inverted_interchromosomal_transposition +1000157 uninverted_interchromosomal_transposition +1000158 inverted_intrachromosomal_transposition +1000159 uninverted_intrachromosomal_transposition +1000160 unoriented_insertional_duplication +1000161 unoriented_interchromosomal_transposition +1000162 unoriented_intrachromosomal_transposition +1000170 uncharacterized_chromosomal_mutation +1000171 deficient_inversion +1000173 tandem_duplication +1000175 partially_characterized_chromosomal_mutation +1000180 sequence_variant_affecting_gene_structure +1000181 sequence_variant_causing_gene_fusion +1000182 chromosome_number_variation +1000183 chromosome_structure_variation +1000184 sequence_variant_causes_exon_loss +1000185 sequence_variant_causes_intron_gain +1000186 sequence_variant_causing_cryptic_splice_donor_activation +1001186 sequence_variant_causing_cryptic_splice_acceptor_activation +1001187 alternatively_spliced_transcript +1001188 encodes_1_polypeptide +1001189 encodes_greater_than_1_polypeptide +1001190 encodes_different_polypeptides_different_stop +1001191 encodes_overlapping_peptides_different_start +1001192 encodes_disjoint_polypeptides +1001193 encodes_overlapping_polypeptides_different_start_and_stop +1001194 alternatively_spliced_gene_encoding_greater_than_1_polypeptide_coding_regions_overlapping +1001195 encodes_overlapping_peptides +1001196 cryptogene +1001197 dicistronic_primary_transcript +1001217 member_of_regulon +1001244 alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_different_start_codon_different_stop_codon_coding_regions_non_overlapping +1001246 CDS_independently_known +1001247 orphan_CDS +1001249 CDS_supported_by_domain_match_data +1001251 CDS_supported_by_sequence_similarity_data +1001254 CDS_predicted +1001255 status_of_coding_sequence +1001259 CDS_supported_by_EST_or_cDNA_data +1001260 internal_Shine_Dalgarno_sequence +1001261 recoded_mRNA +1001262 minus_1_translationally_frameshifted +1001263 plus_1_translationally_frameshifted +1001264 mRNA_recoded_by_translational_bypass +1001265 mRNA_recoded_by_codon_redefinition +1001266 stop_codon_redefinition_as_selenocysteine +1001267 stop_codon_readthrough +1001268 recoding_stimulatory_region +1001269 four_bp_start_codon +1001270 stop_codon_redefinition_as_pyrrolysine +1001271 archaeal_intron +1001272 tRNA_intron +1001273 CTG_start_codon +1001274 SECIS_element +1001275 retron +1001277 three_prime_recoding_site +1001279 three_prime_stem_loop_structure +1001280 five_prime_recoding_site +1001281 flanking_three_prime_quadruplet_recoding_signal +1001282 UAG_stop_codon_signal +1001283 UAA_stop_codon_signal +1001284 regulon +1001285 UGA_stop_codon_signal +1001286 three_prime_repeat_recoding_signal +1001287 distant_three_prime_recoding_signal +1001288 stop_codon_signal +2000061 databank_entry +3000000 gene_segment +00000010002382 5_prime_UTR_uORF_stop_codon_variant +00000020002382 5_prime_UTR_uORF_frameshift_variant +00000030002382 5_prime_UTR_uORF_stop_codon_gain_variant +00000040002382 5_prime_UTR_uORF_stop_codon_loss_variant diff --git a/src/pyobo/sources/flybase.py b/src/pyobo/sources/flybase.py index 0ebfb028..a088602e 100644 --- a/src/pyobo/sources/flybase.py +++ b/src/pyobo/sources/flybase.py @@ -7,6 +7,7 @@ from tqdm.auto import tqdm from pyobo import Reference +from pyobo.resources.so import get_so_name from pyobo.struct import Obo, Term, from_species, orthologous from pyobo.utils.io import multisetdict from pyobo.utils.path import ensure_df @@ -133,7 +134,7 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]: "FlyBase gene type is missing mapping to Sequence Ontology (SO): %s", gtype ) else: - so[gtype] = Reference.auto("SO", so_id) + so[gtype] = Reference(prefix="SO", identifier=so_id, name=get_so_name(so_id)) for _, reference in sorted(so.items()): yield Term(reference=reference) diff --git a/src/pyobo/sources/hgnc.py b/src/pyobo/sources/hgnc.py index 61b1db86..bd47eef6 100644 --- a/src/pyobo/sources/hgnc.py +++ b/src/pyobo/sources/hgnc.py @@ -13,6 +13,7 @@ from tqdm.auto import tqdm from pyobo.api.utils import get_version +from pyobo.resources.so import get_so_name from pyobo.struct import ( Obo, Reference, @@ -222,7 +223,7 @@ class HGNCGetter(Obo): alias_symbol_type, ] root_terms = [ - Reference(prefix="so", identifier=so_id) + Reference(prefix="SO", identifier=so_id, name=get_so_name(so_id)) for so_id in sorted(set(LOCUS_TYPE_TO_SO.values())) if so_id ] @@ -256,7 +257,7 @@ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Te yield Term.from_triple("NCBITaxon", "9606", "Homo sapiens") yield from sorted( { - Term(reference=Reference.auto("SO", so_id)) + Term(reference=Reference(prefix="SO", identifier=so_id, name=get_so_name(so_id))) for so_id in sorted(LOCUS_TYPE_TO_SO.values()) if so_id }, @@ -418,9 +419,11 @@ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Te locus_group = entry.pop("locus_group") so_id = LOCUS_TYPE_TO_SO.get(locus_type) if so_id: - term.append_parent(Reference.auto("SO", so_id)) + term.append_parent(Reference(prefix="SO", identifier=so_id, name=get_so_name(so_id))) else: - term.append_parent(Reference.auto("SO", "0000704")) # gene + term.append_parent( + Reference(prefix="SO", identifier="0000704", name=get_so_name("0000704")) + ) # gene unhandle_locus_types[locus_type][identifier] = term term.append_property("locus_type", locus_type) term.append_property("locus_group", locus_group) diff --git a/src/pyobo/sources/pombase.py b/src/pyobo/sources/pombase.py index 776de7e2..98052c0f 100644 --- a/src/pyobo/sources/pombase.py +++ b/src/pyobo/sources/pombase.py @@ -9,6 +9,7 @@ import pyobo from pyobo import Reference +from pyobo.resources.so import get_so_name from pyobo.struct import Obo, Term, from_species, has_gene_product, orthologous from pyobo.utils.path import ensure_df @@ -70,7 +71,9 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]: df = ensure_df(PREFIX, url=GENE_NAMES_URL, force=force, version=version) so = { - gtype: Reference.auto("SO", POMBASE_TO_SO[gtype]) + gtype: Reference( + prefix="SO", identifier=POMBASE_TO_SO[gtype], name=get_so_name(POMBASE_TO_SO[gtype]) + ) for gtype in sorted(df[df.columns[6]].unique()) } for _, reference in sorted(so.items()): diff --git a/src/pyobo/sources/zfin.py b/src/pyobo/sources/zfin.py index 39451f88..1c745334 100644 --- a/src/pyobo/sources/zfin.py +++ b/src/pyobo/sources/zfin.py @@ -7,6 +7,7 @@ from tqdm.auto import tqdm +from pyobo.resources.so import get_so_name from pyobo.struct import ( Obo, Reference, @@ -113,7 +114,9 @@ def get_terms(force: bool = False, version: Optional[str] = None) -> Iterable[Te ) df["sequence_ontology_id"] = df["sequence_ontology_id"].map(lambda x: x[len("SO:") :]) so = { - sequence_ontology_id: Reference.auto(prefix="SO", identifier=sequence_ontology_id) + sequence_ontology_id: Reference( + prefix="SO", identifier=sequence_ontology_id, name=get_so_name(sequence_ontology_id) + ) for sequence_ontology_id in df["sequence_ontology_id"].unique() } for _, reference in sorted(so.items()):