Skip to content

Commit

Permalink
FMSI can export MSes.
Browse files Browse the repository at this point in the history
  • Loading branch information
OndrejSladky committed Feb 27, 2024
1 parent 73189fa commit 4a38629
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 9 deletions.
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ The recognized commands are:
- `query` Queries a $k$-mer against an index.
- `merge` Merge several indexes.
- `normalize` Normalize an index.
- `export` Export the underlying masked superstring.
- `clean` Cleans the files stored for index.
- `-v` Prints the version of the program.

Expand Down Expand Up @@ -98,6 +99,13 @@ It recognizes the following arguments:

For example: `./fmsi normalize -p spneumoniae.fa -k 13 -f xor -s`

### Export

Export (`./fmsi export`) recognizes the following arguments:

- `-p path_to_fasta` - The path to the fasta from which the index was created. This is a required argument.


### Clean

Clean (`./fmsi clean`) recognizes the following arguments:
Expand Down
61 changes: 54 additions & 7 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
#include <unistd.h>

static int usage() {
std::cerr << "MSI is a tool for efficient indexing of Masked Superstrings."
std::cerr << "FMSI is a tool for efficient indexing of Masked Superstrings."
<< std::endl;
std::cerr << std::endl << "The recognized commands are:" << std::endl;
std::cerr << " `index` - Creates a BWT based index of the given masked "
Expand All @@ -27,14 +27,15 @@ static int usage() {
std::cerr << " `clean` - Cleans the files stored for index." << std::endl;
std::cerr << " `merge` - Merges several indices." << std::endl;
std::cerr << " `normalize` - Normalizes the given index." << std::endl;
std::cerr << " `export` - Export the underlying masked superstring." << std::endl;
std::cerr << " `-v` - Prints the version of the program." << std::endl;
std::cerr << " `-h` - Prints this help." << std::endl;
return 1;
}

static int usage_index() {
std::cerr
<< "MS-Index Index creates the index for a given masked superstring."
<< "FMSI Index creates the index for a given masked superstring."
<< std::endl;
std::cerr << std::endl << "The recognized arguments are:" << std::endl;
std::cerr << " `-p path_to_fasta` - The path to the fasta file with masked "
Expand All @@ -54,7 +55,7 @@ static int usage_index() {
}

static int usage_merge() {
std::cerr << "MS-Index Merge merges several indices." << std::endl;
std::cerr << "FMSI Merge merges several indices." << std::endl;
std::cerr << std::endl << "The recognized arguments are:" << std::endl;
std::cerr << " `-p path_to_fasta` - The path to the fasta file which "
"should be merged. Can be provided multiple times. It is "
Expand All @@ -70,7 +71,7 @@ static int usage_merge() {
}

static int usage_query() {
std::cerr << "MS-Index Query return whether the provided $k$-mer is in the "
std::cerr << "FMSI Query return whether the provided $k$-mer is in the "
"masked superstring or not."
<< std::endl;
std::cerr
Expand Down Expand Up @@ -110,7 +111,7 @@ static int usage_query() {
}

static int usage_normalize() {
std::cerr << "MS-Index Normalize normalizes the given FM-index so that it "
std::cerr << "FMSI Normalize normalizes the given FM-index so that it "
"does not occupy more space than needed."
<< std::endl;
std::cerr << std::endl << "The recognized arguments are:" << std::endl;
Expand Down Expand Up @@ -148,9 +149,20 @@ static int usage_normalize() {
return 1;
}

static int usage_export() {
std::cerr << "FMSI Export exports the indexed masked superstring to its string form."
<< std::endl;
std::cerr << std::endl << "The recognized arguments are:" << std::endl;
std::cerr << " `-p path_to_fasta` - The path to the fasta file from which "
"the index was created. Required."
<< std::endl;
std::cerr << " `-h` - Prints this help and terminates." << std::endl;
return 1;
}

static int usage_clean() {
std::cerr
<< "MS-Index Index creates the index for a given masked superstring."
<< "FMSI Index creates the index for a given masked superstring."
<< std::endl;
std::cerr << std::endl << "The recognized arguments are:" << std::endl;
std::cerr << " `-p path_to_fasta` - The path to the fasta file with masked "
Expand Down Expand Up @@ -415,7 +427,7 @@ int ms_merge(int argc, char *argv[]) {
}

void print_masked_superstring(masked_superstring_t ms) {
std::cout << "> normalized masked superstring" << std::endl;
std::cout << "> masked superstring" << std::endl;
for (size_t i = 0; i < ms.superstring.size(); ++i) {
if (ms.mask[i])
std::cout << ms.superstring[i];
Expand Down Expand Up @@ -517,6 +529,39 @@ int ms_normalize(int argc, char *argv[]) {
return 0;
}

int ms_export(int argc, char *argv[]) {
bool usage = false;
int c;
std::string fn;
while ((c = getopt(argc, argv, "p:h")) >= 0) {
switch (c) {
case 'h':
usage = true;
break;
case 'p':
fn = optarg;
break;
default:
return usage_export();
}
}
if (usage) {
usage_export();
return 0;
}

fm_index_t fm_index;
bw_mask_t mask;
// TODO: generalize this for multi-k masks.
if (!load_index_pair(fn, 0, fm_index, mask))
return usage_normalize();

auto superstring = sdsl::extract(fm_index, 0, fm_index.size() - 2);
auto original_mask = construct_inverse_mask(superstring, mask);
print_masked_superstring({original_mask, superstring});
return 0;
}

int ms_clean(int argc, char *argv[]) {
bool usage = false;
int c;
Expand Down Expand Up @@ -567,6 +612,8 @@ int main(int argc, char *argv[]) {
ret = ms_merge(argc - 1, argv + 1);
else if (strcmp(argv[1], "normalize") == 0)
ret = ms_normalize(argc - 1, argv + 1);
else if (strcmp(argv[1], "export") == 0)
ret = ms_export(argc - 1, argv + 1);
else if (strcmp(argv[1], "-v") == 0)
return version();
else if (strcmp(argv[1], "-h") == 0) {
Expand Down
2 changes: 1 addition & 1 deletion tests/testfiles/result_normalized.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
> normalized masked superstring
> masked superstring
TAcAcg
2 changes: 1 addition & 1 deletion tests/testfiles/result_normalized2.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
> normalized masked superstring
> masked superstring
CgTGta

0 comments on commit 4a38629

Please sign in to comment.