Skip to content

Commit

Permalink
main : fix double quote escaping in csv output (ggerganov#2090)
Browse files Browse the repository at this point in the history
  • Loading branch information
mashizora authored and iThalay committed Sep 23, 2024
1 parent 6555158 commit 638585e
Showing 1 changed file with 33 additions and 1 deletion.
34 changes: 33 additions & 1 deletion examples/main/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,38 @@ char *escape_double_quotes_and_backslashes(const char *str) {
return escaped;
}

// double quote should be escaped by another double quote. (rfc4180)
char *escape_double_quotes_in_csv(const char *str) {
if (str == NULL) {
return NULL;
}

size_t escaped_length = strlen(str) + 1;

for (size_t i = 0; str[i] != '\0'; i++) {
if (str[i] == '"') {
escaped_length++;
}
}

char *escaped = (char *)calloc(escaped_length, 1); // pre-zeroed
if (escaped == NULL) {
return NULL;
}

size_t pos = 0;
for (size_t i = 0; str[i] != '\0'; i++) {
if (str[i] == '"') {
escaped[pos++] = '"';
}
escaped[pos++] = str[i];
}

// no need to set zero due to calloc() being used prior

return escaped;
}

bool output_csv(struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
std::ofstream fout(fname);
if (!fout.is_open()) {
Expand All @@ -492,7 +524,7 @@ bool output_csv(struct whisper_context * ctx, const char * fname, const whisper_
const char * text = whisper_full_get_segment_text(ctx, i);
const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
char * text_escaped = escape_double_quotes_and_backslashes(text);
char * text_escaped = escape_double_quotes_in_csv(text);

//need to multiply times returned from whisper_full_get_segment_t{0,1}() by 10 to get milliseconds.
fout << 10 * t0 << "," << 10 * t1 << ",";
Expand Down

0 comments on commit 638585e

Please sign in to comment.