Skip to content

Commit

Permalink
Merge pull request PGScatalog#328 from ens-lgil/hotfix/files_copy
Browse files Browse the repository at this point in the history
Fix issue with permission to copy files and add a new parameter 'user…
  • Loading branch information
ens-lgil authored Jan 31, 2024
2 parents f5a14f3 + 3af611b commit e25dde2
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 38 deletions.
37 changes: 24 additions & 13 deletions release/scripts/CopyHarmonizedScoringFilesPOS.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import sys, os, shutil, stat, glob, re
import sys, os, shutil, stat, glob, re, pwd
from os import path
import requests
import argparse
Expand All @@ -18,12 +18,13 @@ class CopyHarmonizedScoringFilesPOS:
for gb in genebuilds:
log_msg[type][gb] = []

def __init__(self, new_ftp_scores_dir, staged_harmonized_files_dir, harmonized_files_dir,md5_sql_filepath):
def __init__(self, new_ftp_scores_dir, staged_harmonized_files_dir, harmonized_files_dir, md5_sql_filepath, username):
self.new_ftp_scores_dir = new_ftp_scores_dir
self.harmonized_files_staged_dir = staged_harmonized_files_dir
self.harmonized_files_prod_dir = harmonized_files_dir
self.scores_list_file_path = new_ftp_scores_dir+'/'+self.scores_list_file
self.md5_sql_filepath = md5_sql_filepath
self.username = username

if not os.path.exists(new_ftp_scores_dir):
print(f'Error: The path to the data directory can\'t be found ({new_ftp_scores_dir}).')
Expand Down Expand Up @@ -104,13 +105,23 @@ def copy_harmonized_files_to_production(self):
self.create_directory(f'{self.harmonized_files_prod_dir}/{pgs_id}/')
self.create_directory(harmonized_file_prod_dir)
if os.path.isdir(harmonized_file_prod_dir):
shutil.copy2(harmonized_file_staged, harmonized_file_prod)
# Change chmod to allow group write access
try:
os.chmod(harmonized_file_prod, stat.S_IRUSR|stat.S_IWUSR|stat.S_IRGRP|stat.S_IWGRP|stat.S_IROTH)
print(copy_msg)
except:
print(f">>>>> ERROR! Can't change the read/write access of the file '{harmonized_file}'!")
shutil.copyfile(harmonized_file_staged, harmonized_file_prod)
# If there is any permission issue
except PermissionError as e:
print(f'>>>>> ERROR! File \'{harmonized_file}\' (Permission issue) - {e}')
except IOError as e:
print(f'>>>>> ERROR! File \'{harmonized_file}\' couldn\'t be copied to production: "{self.harmonized_files_prod_dir}"!')
print(e)

# Change chmod to allow group write access
file_owner = pwd.getpwuid(os.stat(harmonized_file_prod).st_uid).pw_name
if self.username == file_owner:
try:
os.chmod(harmonized_file_prod, stat.S_IRUSR|stat.S_IWUSR|stat.S_IRGRP|stat.S_IWGRP|stat.S_IROTH)
print(copy_msg)
except:
print(f">>>>> ERROR! Can't change the read/write access of the file '{harmonized_file}'!")

file_info = { 'genebuild': gb, 'name': harmonized_file, 'status': copy_type }
if not pgs_id in self.harmonized_files_to_copy:
Expand Down Expand Up @@ -140,7 +151,7 @@ def copy_harmonized_files_to_metadata(self):
""" Copy the new/updated scoring files to the metadata directory (temporary FTP) """
print("\n***** Step 2 - Copy the new/updated scoring files to the metadata directory (temporary FTP) *****")

md5_sql_file = open(self.md5_sql_filepath,'a')
# md5_sql_file = open(self.md5_sql_filepath,'a')

for score_id in sorted(self.harmonized_files_to_copy.keys()):

Expand Down Expand Up @@ -169,11 +180,11 @@ def copy_harmonized_files_to_metadata(self):
shutil.copy2(harmonized_file_prod, harmonized_file_release)
self.log_msg[harmonized_status][harmonized_gb].append(score_id)

# md5 checksum SQL commands
sql_cmd = f"UPDATE {self.sql_table} SET hmpos_{harmonized_gb}_md5='{harmonized_file_md5}' WHERE score_id={id};\n"
md5_sql_file.write(sql_cmd)
# # md5 checksum SQL commands
# sql_cmd = f"UPDATE {self.sql_table} SET hmpos_{harmonized_gb}_md5='{harmonized_file_md5}' WHERE score_id={id};\n"
# md5_sql_file.write(sql_cmd)

md5_sql_file.close()
# md5_sql_file.close()

# Copied PGS Scoring files
self.print_log_msg('new', 'New PGS Scoring files')
Expand Down
41 changes: 23 additions & 18 deletions release/scripts/CopyScoringFiles.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import sys, os, shutil, stat, glob, re
import sys, os, shutil, stat, glob, re, pwd
from os import path
import requests
import argparse
Expand All @@ -10,18 +10,18 @@ class CopyScoringFiles:
ftp_std_scoringfile_suffix = '.txt.gz'
scores_list_file = 'pgs_scores_list.txt'
sql_table = 'catalog_scorefilemd5'

log_msg = {
'new': [],
'updated': [],
'skipped': []
}

def __init__(self, new_ftp_scores_dir, staged_scores_dir, scoring_files_dir,md5_sql_filepath):
def __init__(self, new_ftp_scores_dir, staged_scores_dir, scoring_files_dir, md5_sql_filepath, username):
self.new_ftp_scores_dir = new_ftp_scores_dir
self.new_scoringfiles_dir = staged_scores_dir
self.scoring_files_dir = scoring_files_dir
self.md5_sql_filepath = md5_sql_filepath
self.username = username

if not os.path.exists(new_ftp_scores_dir):
print(f'Error: The path to the data directory can\'t be found ({new_ftp_scores_dir}).')
Expand Down Expand Up @@ -128,23 +128,28 @@ def copy_scoring_files_to_production(self):
if copy_msg != '':
# Copy file
try:
shutil.copy2(scoring_file_ftp_priv, scoring_file_prod)
shutil.copyfile(scoring_file_ftp_priv, scoring_file_prod)
print(copy_msg)
if copy_type == 'update':
count_updated_pgs += 1
elif copy_type == 'new':
count_new_pgs += 1
else:
print(f'>>>>> ERROR! Can\'t determine whether the copy of \'{scoring_file}\' was due to the very first version of the scoring file or an updated version of the file')
# If there is any permission issue
except PermissionError as e:
print(f'>>>>> ERROR! File \'{scoring_file}\' (Permission issue) - {e}')
except IOError as e:
print(f'>>>>> ERROR! File \'{scoring_file}\' couldn\'t be copied to "{self.scoring_files_dir}"!')
print(f'>>>>> ERROR! File \'{scoring_file}\' couldn\'t be copied to production: "{self.scoring_files_dir}"!')
print(e)
# Change chmod to allow group write access
if os.path.isfile(scoring_file_prod):
try:
os.chmod(scoring_file_prod, stat.S_IRUSR|stat.S_IWUSR|stat.S_IRGRP|stat.S_IWGRP|stat.S_IROTH)
except:
print(f">>>>> ERROR! Can't change the read/write access of the file '{scoring_file}'!")
file_owner = pwd.getpwuid(os.stat(scoring_file_prod).st_uid).pw_name
if self.username == file_owner:
try:
os.chmod(scoring_file_prod, stat.S_IRUSR|stat.S_IWUSR|stat.S_IRGRP|stat.S_IWGRP|stat.S_IROTH)
except:
print(f">>>>> ERROR! Can't change the read/write access of the file '{scoring_file}'!")
total_count = count_new_pgs + count_updated_pgs
print(f'Number of PGS files successfully copied: {total_count} (New: {count_new_pgs} | Updated: {count_updated_pgs} | Skipped: {count_skipped_pgs})')

Expand All @@ -153,7 +158,7 @@ def copy_scoring_files_to_metadata(self):
""" Copy the new/updated scoring files to the metadata directory (temporary FTP) """
print("\n***** Step 2 - Copy the new/updated scoring files to the metadata directory (temporary FTP) *****")

md5_sql_file = open(self.md5_sql_filepath,'w')
# md5_sql_file = open(self.md5_sql_filepath,'w')

for score_id in sorted(os.listdir(self.new_ftp_scores_dir+'/scores/')):
score_release_dir = self.new_ftp_scores_dir+'/scores/'+score_id+'/ScoringFiles/'
Expand Down Expand Up @@ -211,15 +216,15 @@ def copy_scoring_files_to_metadata(self):
if not score_id in self.log_msg['updated']:
self.log_msg['new'].append(score_id)

# md5 checksum SQL commands
id = re.sub(r'PGS0+(.+)', r'\1', score_id)
if is_updated:
sql_cmd = f"UPDATE {self.sql_table} SET score_md5='{new_score_md5}' WHERE score_id={id};\n"
else:
sql_cmd = f"INSERT INTO {self.sql_table} (score_id,score_md5) VALUES ({id},'{new_score_md5}');\n"
md5_sql_file.write(sql_cmd)
# # md5 checksum SQL commands
# id = re.sub(r'PGS0+(.+)', r'\1', score_id)
# if is_updated:
# sql_cmd = f"UPDATE {self.sql_table} SET score_md5='{new_score_md5}' WHERE score_id={id};\n"
# else:
# sql_cmd = f"INSERT INTO {self.sql_table} (score_id,score_md5) VALUES ({id},'{new_score_md5}');\n"
# md5_sql_file.write(sql_cmd)

md5_sql_file.close()
# md5_sql_file.close()


# Copied PGS Scoring files
Expand Down
15 changes: 8 additions & 7 deletions release/scripts/run_copy_scoring_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,17 @@
from release.scripts.CopyHarmonizedScoringFilesPOS import CopyHarmonizedScoringFilesPOS


def copy_scoring_files(new_ftp_dir,staged_scores_dir,scores_dir,md5_sql_filepath):
def copy_scoring_files(new_ftp_dir,staged_scores_dir,scores_dir,md5_sql_filepath,username):
print("\n#### Copy the new formatted scoring files ####")
pgs_scoring_files = CopyScoringFiles(new_ftp_dir,staged_scores_dir,scores_dir,md5_sql_filepath)
pgs_scoring_files = CopyScoringFiles(new_ftp_dir,staged_scores_dir,scores_dir,md5_sql_filepath,username)
pgs_scoring_files.get_previous_release()
pgs_scoring_files.get_list_of_scores()
pgs_scoring_files.copy_scoring_files_to_production()
pgs_scoring_files.copy_scoring_files_to_metadata()

def copy_hmpos_scoring_files(new_ftp_dir,staged_scores_dir,scores_dir,md5_sql_filepath):
def copy_hmpos_scoring_files(new_ftp_dir,staged_scores_dir,scores_dir,md5_sql_filepath,username):
print("\n#### Copy the new harmonized position scoring files ####")
pgs_harmonized_files = CopyHarmonizedScoringFilesPOS(new_ftp_dir,staged_scores_dir,scores_dir,md5_sql_filepath)
pgs_harmonized_files = CopyHarmonizedScoringFilesPOS(new_ftp_dir,staged_scores_dir,scores_dir,md5_sql_filepath,username)
pgs_harmonized_files.copy_harmonized_files_to_production()
pgs_harmonized_files.copy_harmonized_files_to_metadata()

Expand All @@ -42,7 +42,7 @@ def main():
argparser.add_argument("--scores_dir", type=str, help='The path to the scoring files directory (Production)', required=False)
argparser.add_argument("--hm_staged_scores_dir", type=str, help='The path to the harmonized Position staged files directory', required=True)
argparser.add_argument("--hm_scores_dir", type=str, help='The path to the harmonized scoring files directory (Production)', required=False)

argparser.add_argument("--username", type=str, help='Linux/Unix username', required=True)

args = argparser.parse_args()

Expand All @@ -51,16 +51,17 @@ def main():
scores_dir = args.scores_dir
hm_staged_scores_dir = args.hm_staged_scores_dir
hm_scores_dir = args.hm_scores_dir
username = args.username

release_date_file = f'{new_ftp_dir}/release_date.txt'
new_release_date = get_new_release_date(release_date_file)

md5_sql_filename = f'scores_md5_{new_release_date}.sql'
md5_sql_filepath = f'{new_ftp_dir}/{md5_sql_filename}'

copy_scoring_files(new_ftp_dir,staged_scores_dir,scores_dir,md5_sql_filepath)
copy_scoring_files(new_ftp_dir,staged_scores_dir,scores_dir,md5_sql_filepath,username)

copy_hmpos_scoring_files(new_ftp_dir,hm_staged_scores_dir,hm_scores_dir,md5_sql_filepath)
copy_hmpos_scoring_files(new_ftp_dir,hm_staged_scores_dir,hm_scores_dir,md5_sql_filepath,username)

# Move/remove temporary files
if os.path.isfile(release_date_file):
Expand Down

0 comments on commit e25dde2

Please sign in to comment.