This repository was archived by the owner on Feb 27, 2023. It is now read-only.
forked from hallamlab/FragGeneScanPlus
-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathfasta.c
87 lines (73 loc) · 2.35 KB
/
fasta.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
/* Simple API for FASTA file reading
* for Bio5495/BME537 Computational Molecular Biology
* SRE, Sun Sep 8 05:35:11 2002 [AA2721, transatlantic]
* CVS $Id: fasta.c,v 1.1 2003/10/05 18:43:39 eddy Exp $
*/
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "fasta.h"
FastaFile *fasta_file_new(const char *seqfile) {
FastaFile *ffp;
ffp = malloc(sizeof(FastaFile));
if (strcmp(seqfile, "stdin") == 0) {
ffp->fp = stdin;
} else {
ffp->fp = fopen(seqfile, "r"); /* Assume seqfile exists & readable! */
if (ffp->fp == NULL) {
free(ffp);
return NULL;
}
}
if ((fgets(ffp->buffer, STRINGLEN, ffp->fp)) == NULL) {
free(ffp);
return NULL;
}
return ffp;
}
bool fasta_file_read_record(FastaFile *ffp, char **out_seq, char **out_header, int *out_seq_len) {
char *s, *header, *seq;
int n, nalloc;
/* Peek at the lookahead buffer; check if it's a valid FASTA header. */
if (ffp->buffer[0] != '>')
return false;
/* Parse out the header */
s = strtok(ffp->buffer+1, "\n");
if (s == NULL)
return false;
header = malloc(sizeof(char) * (strlen(s)+1));
strcpy(header, s);
/* Everything else 'til the next descline is the sequence.
* Note the idiom for dynamic reallocation of seq as we
* read more characters, so we don't have to assume a maximum
* sequence length.
*/
seq = calloc(1024, sizeof(char));
n = 0;
nalloc = 128;
while (fgets(ffp->buffer, STRINGLEN, ffp->fp)) {
if (ffp->buffer[0] == '>')
break; /* We've reached the next header */
for (s = ffp->buffer; *s != '\0'; s++) {
if (!isalpha(*s))
continue; /* accept any alphabetic character */
seq[n] = *s; /* store the character, bump length n */
n++;
if (nalloc == n) { /* are we out of room in seq? if so, expand */
/* (remember, need space for the final '\0')*/
nalloc += 128;
seq = realloc(seq, sizeof(char) * nalloc);
}
}
}
seq[n] = '\0';
*out_header = header;
*out_seq = seq;
*out_seq_len = n;
return true;
}
void fasta_file_free(FastaFile *ffp) {
fclose(ffp->fp);
free(ffp);
}