From 60309250a131141b719996ee46b3e884dac16415 Mon Sep 17 00:00:00 2001 From: Dan Buchholz Date: Tue, 18 Jun 2024 17:31:00 -0500 Subject: [PATCH 1/3] fix(cli): import data errors with strings, nulls --- packages/cli/src/commands/import-data.ts | 43 ++++-- packages/cli/src/utils.ts | 39 +++++- packages/cli/test/import-data.test.ts | 162 +++++++++++++++++++++++ 3 files changed, 225 insertions(+), 19 deletions(-) create mode 100644 packages/cli/test/import-data.test.ts diff --git a/packages/cli/src/commands/import-data.ts b/packages/cli/src/commands/import-data.ts index 1e4be1c1..3c0d01ce 100644 --- a/packages/cli/src/commands/import-data.ts +++ b/packages/cli/src/commands/import-data.ts @@ -3,7 +3,7 @@ import type { Arguments } from "yargs"; import chalk from "chalk"; import { type Wallet } from "ethers"; import { studioAliases } from "@tableland/studio-client"; -import { Database } from "@tableland/sdk"; +import { Database, Validator, helpers as SdkHelpers } from "@tableland/sdk"; import { type GlobalOptions } from "../cli.js"; import { ERROR_INVALID_STORE_PATH, @@ -12,12 +12,14 @@ import { logger, normalizePrivateKey, parseCsvFile, + wrapText, FileStore, } from "../utils.js"; -// note: abnormal spacing is needed to ensure help message is formatted correctly -export const command = "import-data "; -export const desc = "write the content of a csv into an existing table"; +export const command = wrapText("import-data "); +export const desc = wrapText( + "write the content of a csv into an existing table", +); export const handler = async ( argv: Arguments, @@ -46,6 +48,7 @@ export const handler = async ( (await aliases.read())[definition], "could not find definition in project", ); + const { chainId, tableId } = await SdkHelpers.validateTableName(tableName); // need to reverse lookup tableName from definition and projectId so // that the wallet can be connected to the right provider @@ -61,6 +64,13 @@ export const handler = async ( signer, aliases, }); + const baseUrl = SdkHelpers.getBaseUrl(31337); + const val = new Validator({ baseUrl }); + // get the table schema to help map values to their type + const { schema } = await val.getTableById({ + chainId, + tableId: tableId.toString(), + }); const fileString = readFileSync(file).toString(); const dataObject = await parseCsvFile(fileString); @@ -71,7 +81,7 @@ export const handler = async ( // need to capture row length now since `batchRows` will mutate the // rows Array to reduce memory overhead const rowCount = Number(rows.length); - const statements = csvHelp.batchRows(rows, headers, definition); + const statements = csvHelp.batchRows(rows, headers, schema, definition); const doImport = await confirmImport({ statements, @@ -82,21 +92,26 @@ export const handler = async ( if (!doImport) return logger.log("aborting"); - const results = await db.batch(statements.map((stmt) => db.prepare(stmt))); + const stmts = statements.map((stmt) => db.prepare(stmt)); + const results = await db.batch(stmts); // the batch method returns an array of results for reads, but in this case // its an Array of length 1 with a single Object containing txn data const result = results[0]; - - logger.log( - `successfully inserted ${rowCount} row${ - rowCount === 1 ? "" : "s" - } into ${definition} + const rec = await result.meta.txn?.wait(); + if (rec?.errorEventIdx !== undefined) { + logger.error(rec); + } else { + logger.log( + `successfully inserted ${rowCount} row${ + rowCount === 1 ? "" : "s" + } into ${definition} transaction receipt: ${chalk.gray.bold( JSON.stringify(result.meta?.txn, null, 4), )} project id: ${projectId} environment id: ${environmentId}`, - ); + ); + } } catch (err: any) { logger.error(err); } @@ -129,10 +144,10 @@ async function confirmImport(info: { )} to insert ${chalk.yellow(info.rowCount)} row${ info.rowCount === 1 ? "" : "s" } into table ${chalk.yellow(info.tableName)} -This can be done with a total of ${chalk.yellow(statementCount)} statment${ +This can be done with a total of ${chalk.yellow(statementCount)} statement${ statementCount === 1 ? "" : "s" } -The total size of the statment${ +The total size of the statement${ statementCount === 1 ? "" : "s" } is: ${chalk.yellow(statementLength)} The estimated cost is ${cost} diff --git a/packages/cli/src/utils.ts b/packages/cli/src/utils.ts index ab296bfc..576885f8 100644 --- a/packages/cli/src/utils.ts +++ b/packages/cli/src/utils.ts @@ -11,7 +11,7 @@ import { } from "ethers"; import { z } from "zod"; import createKeccakHash from "keccak"; -import { helpers as sdkHelpers } from "@tableland/sdk"; +import { helpers as sdkHelpers, type Schema } from "@tableland/sdk"; import { init } from "@tableland/sqlparser"; import { type API, type ClientConfig, api } from "@tableland/studio-client"; @@ -158,16 +158,31 @@ export const csvHelp = { batchRows: function ( rows: Array>, headers: string[], + schema: Schema, table: string, ) { let rowCount = 0; const batches = []; while (rows.length) { let statement = `INSERT INTO ${table}(${headers.join(",")})VALUES`; + // map headers to schema columns to add in the header index + const schemaWithIdx = headers.map((header, idx) => { + const colInfo = schema.columns.find((col) => { + // we cannot guarantee that an imported table will have backticks + // around the column name, so we need to check for both + return col.name === header || col.name === header.replace(/`/g, ""); + }); + return { + idx, + name: colInfo?.name, + type: colInfo?.type, + }; + }); while ( rows.length && - byteSize(statement + getNextValues(rows[0])) < MAX_STATEMENT_SIZE + byteSize(statement + getNextValues(rows[0], schemaWithIdx)) < + MAX_STATEMENT_SIZE ) { const row = rows.shift(); if (!row) continue; @@ -178,7 +193,7 @@ export const csvHelp = { ); } // include comma between - statement += getNextValues(row); + statement += getNextValues(row, schemaWithIdx); } // remove last comma and add semicolon @@ -209,8 +224,22 @@ const byteSize = function (str: string) { return new Blob([str]).size; }; -const getNextValues = function (row: Array) { - return `(${row.join(",")}),`; +const getNextValues = function ( + row: Array, + schemaWithIdx: Array>, +) { + // wrap values in single quotes if the `type` is not `int` nor `integer` + const rowFormatted = row.map((val, idx) => { + const colInfo = schemaWithIdx.find((col) => { + return col.idx === idx; + }); + const type = colInfo?.type; + // empty csv values must be treated as `NULL` to avoid SQL string errors + if (typeof val === "string" && val.length === 0) return "NULL"; + if (type === "int" || type === "integer") return val; + return `'${val}'`; + }); + return `(${rowFormatted.join(",")}),`; }; function getIdFromTableName(tableName: string, revIndx: number) { diff --git a/packages/cli/test/import-data.test.ts b/packages/cli/test/import-data.test.ts new file mode 100644 index 00000000..8daed6bb --- /dev/null +++ b/packages/cli/test/import-data.test.ts @@ -0,0 +1,162 @@ +import path from "path"; +import { fileURLToPath } from "url"; +import { equal, match } from "node:assert"; +import { getAccounts, getDatabase } from "@tableland/local"; +import { afterEach, before, describe, test } from "mocha"; +import { restore, spy } from "sinon"; +import { temporaryWrite } from "tempy"; +import mockStd from "mock-stdin"; +import yargs from "yargs/yargs"; +import * as modImportTable from "../src/commands/import-table.js"; +import type { CommandOptions as ImportTableCommandOptions } from "../src/commands/import-table.js"; +import * as mod from "../src/commands/import-data.js"; +import { type GlobalOptions } from "../src/cli.js"; +import { logger, wait } from "../src/utils.js"; +import { + TEST_TIMEOUT_FACTOR, + TEST_API_BASE_URL, + TEST_REGISTRY_PORT, + TEST_PROJECT_ID, +} from "./utils.js"; + +const _dirname = path.dirname(fileURLToPath(import.meta.url)); +const accounts = getAccounts(`http://127.0.0.1:${TEST_REGISTRY_PORT}`); +const account = accounts[10]; +const db = getDatabase(account); + +const defaultArgs = [ + "--store", + path.join(_dirname, ".studioclisession.json"), + "--privateKey", + account.privateKey.slice(2), + "--chain", + "local-tableland", + "--providerUrl", + `http://127.0.0.1:${TEST_REGISTRY_PORT}/`, + "--apiUrl", + TEST_API_BASE_URL, + "--projectId", + TEST_PROJECT_ID, +]; + +describe("commands/import-data", function () { + this.timeout(30000 * TEST_TIMEOUT_FACTOR); + + let table1: string; + let table2: string; + const defName1 = "data_import_1"; + const defName2 = "data_import_2"; + const desc = "table description"; + + before(async function () { + const batch = await db.batch([ + // use no backticks vs. including them to emulate a non-Studio vs. Studio + // created table's column names (ensure csv header/col type parsing works) + db.prepare(`create table ${defName1} (id int, val text);`), + db.prepare(`create table ${defName2} (\`id\` int, \`val\` text);`), + ]); + const res = await batch[0].meta.txn?.wait(); + const tableNames = res?.names ?? []; + table1 = tableNames[0]; + table2 = tableNames[1]; + + await yargs(["import", "table", table1, desc, ...defaultArgs]) + .command(modImportTable) + .parse(); + await yargs(["import", "table", table2, desc, ...defaultArgs]) + .command(modImportTable) + .parse(); + + await wait(1000); + }); + + afterEach(function () { + restore(); + }); + + test("can import with all values included", async function () { + const csvStr = `id,val\n1,test_value`; + const csvFile = await temporaryWrite(csvStr, { extension: "csv" }); + + const consoleLog = spy(logger, "log"); + const stdin = mockStd.stdin(); + setTimeout(() => { + stdin.send("y\n"); + }, 1000); + await yargs(["import-data", defName1, csvFile, ...defaultArgs]) + .command(mod) + .parse(); + + const res = consoleLog.getCall(0).firstArg; + const successRes = res.match(/^(.*)$/m)[1]; + + equal(successRes, `successfully inserted 1 row into ${defName1}`); + }); + + test("can import with empty row values", async function () { + const csvStr = `id,val\n1,\n,test_value\n`; + const csvFile = await temporaryWrite(csvStr, { extension: "csv" }); + + const consoleLog = spy(logger, "log"); + const stdin = mockStd.stdin(); + setTimeout(() => { + stdin.send("y\n"); + }, 1000); + await yargs(["import-data", defName2, csvFile, ...defaultArgs]) + .command(mod) + .parse(); + + const res = consoleLog.getCall(0).firstArg; + const successRes = res.match(/^(.*)$/m)[1]; + + equal(successRes, `successfully inserted 2 rows into ${defName2}`); + }); + + test("fails with wrong headers", async function () { + const csvStr = `not_id,not_val\n1,test_value\n`; + const csvFile = await temporaryWrite(csvStr, { extension: "csv" }); + + const consoleError = spy(logger, "error"); + const stdin = mockStd.stdin(); + setTimeout(() => { + stdin.send("y\n"); + }, 1000); + await yargs(["import-data", defName2, csvFile, ...defaultArgs]) + .command(mod) + .parse(); + + const res = consoleError.getCall(0).firstArg; + const regex = new RegExp(`table ${table2} has no column named not_id`); + match(res.toString(), regex); + }); + + test("fails with mismatched header and row length", async function () { + let csvStr = `id\n1,test_value\n`; + let csvFile = await temporaryWrite(csvStr, { extension: "csv" }); + + const consoleError = spy(logger, "error"); + const stdin = mockStd.stdin(); + setTimeout(() => { + stdin.send("y\n"); + }, 1000); + await yargs(["import-data", defName2, csvFile, ...defaultArgs]) + .command(mod) + .parse(); + + let res = consoleError.getCall(0).firstArg; + const regex = /Invalid Record Length/; + match(res.toString(), regex); + + csvStr = `id,val\n1\n`; + csvFile = await temporaryWrite(csvStr, { extension: "csv" }); + setTimeout(() => { + stdin.send("y\n"); + }, 1000); + await yargs(["import-data", defName2, csvFile, ...defaultArgs]) + .command(mod) + .parse(); + + res = consoleError.getCall(0).firstArg; + match(res.toString(), regex); + }); +}); From 2639b0cb13fcb26f74c4101cdcbddda89196cb4c Mon Sep 17 00:00:00 2001 From: Dan Buchholz Date: Wed, 10 Jul 2024 13:34:33 -0400 Subject: [PATCH 2/3] fix(cli): remove hardcoded chain ID --- packages/cli/src/commands/import-data.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/cli/src/commands/import-data.ts b/packages/cli/src/commands/import-data.ts index 3c0d01ce..daa19b4e 100644 --- a/packages/cli/src/commands/import-data.ts +++ b/packages/cli/src/commands/import-data.ts @@ -64,7 +64,7 @@ export const handler = async ( signer, aliases, }); - const baseUrl = SdkHelpers.getBaseUrl(31337); + const baseUrl = SdkHelpers.getBaseUrl(chainId); const val = new Validator({ baseUrl }); // get the table schema to help map values to their type const { schema } = await val.getTableById({ From 58661c7c4751407c433c32f53ca16838667942af Mon Sep 17 00:00:00 2001 From: Dan Buchholz Date: Fri, 19 Jul 2024 12:26:28 -0400 Subject: [PATCH 3/3] refactor: wip parsing fixes --- packages/cli/src/utils.ts | 8 ++++++- packages/cli/test/import-data.test.ts | 31 +++++++++++++++++++++++++-- 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/packages/cli/src/utils.ts b/packages/cli/src/utils.ts index 576885f8..aecea424 100644 --- a/packages/cli/src/utils.ts +++ b/packages/cli/src/utils.ts @@ -630,7 +630,13 @@ export const logger = { export const parseCsvFile = async function (file: string): Promise { return await new Promise(function (resolve, reject) { - const parser = parse(); + // custom parsing + const parserOptions = { + skipEmptyLines: true, + trim: true, + }; + + const parser = parse(parserOptions); const rows: any[] = []; parser.on("readable", function () { diff --git a/packages/cli/test/import-data.test.ts b/packages/cli/test/import-data.test.ts index 8daed6bb..32eb26a0 100644 --- a/packages/cli/test/import-data.test.ts +++ b/packages/cli/test/import-data.test.ts @@ -39,7 +39,7 @@ const defaultArgs = [ TEST_PROJECT_ID, ]; -describe("commands/import-data", function () { +describe.only("commands/import-data", function () { this.timeout(30000 * TEST_TIMEOUT_FACTOR); let table1: string; @@ -74,7 +74,7 @@ describe("commands/import-data", function () { restore(); }); - test("can import with all values included", async function () { + test("can import a single row", async function () { const csvStr = `id,val\n1,test_value`; const csvFile = await temporaryWrite(csvStr, { extension: "csv" }); @@ -93,6 +93,33 @@ describe("commands/import-data", function () { equal(successRes, `successfully inserted 1 row into ${defName1}`); }); + test.only("can import with quotes, escape chars, and multi line", async function () { + /* eslint-disable no-useless-escape */ + const csvStr = `id,val +1,"I'll work" +1,And I'll work +4,This ends with a backslash \\ +7,"Escapes \'single quotes\'" +8,This "quote" works +`; + console.log(csvStr); + const csvFile = await temporaryWrite(csvStr, { extension: "csv" }); + + const consoleLog = spy(logger, "error"); + const stdin = mockStd.stdin(); + setTimeout(() => { + stdin.send("y\n"); + }, 1000); + await yargs(["import-data", defName1, csvFile, ...defaultArgs]) + .command(mod) + .parse(); + + const res = consoleLog.getCall(0).firstArg; + const successRes = res.match(/^(.*)$/m)[1]; + + equal(successRes, `successfully inserted 7 rows into ${defName1}`); + }); + test("can import with empty row values", async function () { const csvStr = `id,val\n1,\n,test_value\n`; const csvFile = await temporaryWrite(csvStr, { extension: "csv" });