Skip to content

Commit

Permalink
Handle UTF BOM
Browse files Browse the repository at this point in the history
  • Loading branch information
vslinko committed Aug 12, 2023
1 parent 6d994b1 commit d6166be
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 16 deletions.
8 changes: 8 additions & 0 deletions reader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ interface HiddenCSVReaderOptions extends CSVReaderOptions {
// deno-lint-ignore no-explicit-any
function noop(_?: any): any {}

const utfBom = new Uint8Array([0xef, 0xbb, 0xbf]);

const defaultCSVReaderOptions: HiddenCSVReaderOptions = {
columnSeparator: ",",
lineSeparator: "\n",
Expand Down Expand Up @@ -327,6 +329,12 @@ export class CSVReader {
return;
}

// skip UTF BOM
if (!this.inColumn && this.currentPos === 0 && this.hasNext(utfBom)) {
this.skip(utfBom.length);
continue;
}

if (!this.inColumn && this.inputBufferUnprocessed === 0) {
this.debug("eof");
if (!this.emptyLine) {
Expand Down
46 changes: 30 additions & 16 deletions reader_test.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { concat } from "./deps.ts";
import { assertEquals, assertRejects } from "./dev_deps.ts";
import {
readCSV,
Expand All @@ -11,8 +12,12 @@ class MyReader implements Deno.Reader {
private buf: Uint8Array;
private index: number;

constructor(content: string) {
constructor(content: string, options: { withBom?: boolean } = {}) {
const opts = { withBom: false, ...options };
this.buf = new TextEncoder().encode(content);
if (opts.withBom) {
this.buf = concat(new Uint8Array([0xef, 0xbb, 0xbf]), this.buf);
}
this.index = 0;
}

Expand Down Expand Up @@ -63,6 +68,17 @@ a,b,c`);
},
});

Deno.test({
name: "readCSV parses file with UTF BOM",
async fn() {
const reader = new MyReader(`"1","2"`, { withBom: true });

const rows = await asyncArrayFrom2(readCSV(reader));

assertEquals(rows, [["1", "2"]]);
},
});

Deno.test({
name: "readCSV skips empty lines",
async fn() {
Expand Down Expand Up @@ -210,17 +226,14 @@ Deno.test({
);

const rows = await asyncArrayFrom2(
readCSV(
reader,
{
_readerIteratorBufferSize: 1,
_columnBufferMinStepSize: 1,
_inputBufferIndexLimit: 1,
_columnBufferReserve: 1,
_stats: stats,
// deno-lint-ignore no-explicit-any
} as any,
),
readCSV(reader, {
_readerIteratorBufferSize: 1,
_columnBufferMinStepSize: 1,
_inputBufferIndexLimit: 1,
_columnBufferReserve: 1,
_stats: stats,
// deno-lint-ignore no-explicit-any
} as any),
);

assertEquals(rows, [
Expand Down Expand Up @@ -364,7 +377,10 @@ g,h`,
readCSVRows(reader, { fromLine: 1, toLine: 3 }),
);

assertEquals(rows, [["c", "d"], ["e", "f"]]);
assertEquals(rows, [
["c", "d"],
["e", "f"],
]);
},
});

Expand Down Expand Up @@ -402,8 +418,6 @@ a,b,c
}),
);

assertEquals(rows, [
["1", "2", "3"],
]);
assertEquals(rows, [["1", "2", "3"]]);
},
});

0 comments on commit d6166be

Please sign in to comment.