Skip to content

Commit

Permalink
Fixed issue in the AlleleCallEvaluator module when sample IDs were al…
Browse files Browse the repository at this point in the history
…l numeric (column would be interpreted as int32).
  • Loading branch information
rfm-targa committed Jul 2, 2024
1 parent cb4bea6 commit 0c2e187
Showing 1 changed file with 9 additions and 6 deletions.
15 changes: 9 additions & 6 deletions CHEWBBACA/AlleleCallEvaluator/evaluate_calls.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ def main(input_files, schema_directory, output_directory, annotations,
[ct.RESULTS_STATISTICS_BASENAME])
sample_counts = pd.read_csv(sample_statistics_file, delimiter='\t')
# Convert values in FILE column to str to avoid issues with IDs that can be interpreted as int
sample_counts['FILE'] = sample_counts['FILE'].astype({'FILE': 'string'})
sample_counts['FILE'] = sample_counts['FILE'].astype('string')
# Sort based on decreasing number of EXC
sample_counts = sample_counts.sort_values(by=['EXC'], ascending=False)

Expand All @@ -266,7 +266,7 @@ def main(input_files, schema_directory, output_directory, annotations,
[ct.LOCI_STATS_BASENAME])
loci_counts = pd.read_csv(loci_statistics_file, delimiter='\t')
# Convert values in Locus column to str to avoid issues with IDs that can be interpreted as int
loci_counts['Locus'] = loci_counts['Locus'].astype({'Locus': 'string'})
loci_counts['Locus'] = loci_counts['Locus'].astype('string')
# Sort based on decreasing number of EXC
loci_counts = loci_counts.sort_values(by=['EXC'], ascending=False)

Expand Down Expand Up @@ -352,7 +352,7 @@ def main(input_files, schema_directory, output_directory, annotations,
profiles_matrix = pd.read_csv(allelic_profiles_file,
header=0, index_col=0,
sep='\t', low_memory=False)
# Convert values in Locus column to str to avoid issues with IDs that can be interpreted as int
# Convert Index values to str to avoid issues with IDs that can be interpreted as int
profiles_matrix.index = profiles_matrix.index.astype('string')
print('done.')
# Mask missing data
Expand Down Expand Up @@ -393,12 +393,15 @@ def main(input_files, schema_directory, output_directory, annotations,
# Based on cgMLST profiles
if len(cgMLST_genes) > 0:
dm_file = dm.main(cgMLST_matrix_outfile, output_directory,
cpu_cores, True, True)
cpu_cores, True, True)
# Import distance matrix
distance_m = pd.read_csv(dm_file[0], header=0, index_col=0,
sep='\t', low_memory=False)
sep='\t', low_memory=False)
# Convert Index values and column names to str to avoid issues with IDs that can be interpreted as int
distance_m.index = distance_m.index.astype('string')
distance_m.columns = distance_m.columns.astype('string')
dm_data = [{"rows": distance_m.values.tolist()},
{"sample_ids": distance_m.columns.tolist()}]
{"sample_ids": distance_m.columns.tolist()}]
else:
print('cgMLST is composed of 0 loci. Cannot compute distance matrix.')

Expand Down

0 comments on commit 0c2e187

Please sign in to comment.