Skip to content

Commit d6a9a35

Browse files
committed
check if organisms already exist in s3 dir
1 parent 8e51b2e commit d6a9a35

File tree

1 file changed

+5
-0
lines changed

1 file changed

+5
-0
lines changed

src/uniprot2s3/main.py

+5
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,11 @@ def run_uniprot_api_parallel(
299299
# Cache HTTP requests to avoid repeated calls
300300
# requests_cache.install_cache("uniprot_cache")
301301
organism_list = get_organism_list(input_dir=input_dir)
302+
# See which organisms have already been downloaded
303+
existing_organism_ids = os.listdir(UNIPROT_S3_DIR)
304+
existing_organism_ids = [file for file in existing_organism_ids if file.endswith('.tsv')]
305+
existing_organism_ids = [file.replace('.tsv','') for file in existing_organism_ids]
306+
organism_list = list(set(organism_list).difference(set(existing_organism_ids)))
302307

303308
# Sort list
304309
taxa_id_common_with_proteomes_list = list(set(organism_list).intersection(taxa_id_from_proteomes_list))

0 commit comments

Comments
 (0)