-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix(cli): import data errors with strings, nulls #290
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,7 +3,7 @@ import type { Arguments } from "yargs"; | |
import chalk from "chalk"; | ||
import { type Wallet } from "ethers"; | ||
import { studioAliases } from "@tableland/studio-client"; | ||
import { Database } from "@tableland/sdk"; | ||
import { Database, Validator, helpers as SdkHelpers } from "@tableland/sdk"; | ||
import { type GlobalOptions } from "../cli.js"; | ||
import { | ||
ERROR_INVALID_STORE_PATH, | ||
|
@@ -12,12 +12,14 @@ import { | |
logger, | ||
normalizePrivateKey, | ||
parseCsvFile, | ||
wrapText, | ||
FileStore, | ||
} from "../utils.js"; | ||
|
||
// note: abnormal spacing is needed to ensure help message is formatted correctly | ||
export const command = "import-data <definition> <file>"; | ||
export const desc = "write the content of a csv into an existing table"; | ||
export const command = wrapText("import-data <definition> <file>"); | ||
export const desc = wrapText( | ||
"write the content of a csv into an existing table", | ||
); | ||
|
||
export const handler = async ( | ||
argv: Arguments<GlobalOptions>, | ||
|
@@ -46,6 +48,7 @@ export const handler = async ( | |
(await aliases.read())[definition], | ||
"could not find definition in project", | ||
); | ||
const { chainId, tableId } = await SdkHelpers.validateTableName(tableName); | ||
|
||
// need to reverse lookup tableName from definition and projectId so | ||
// that the wallet can be connected to the right provider | ||
|
@@ -61,6 +64,13 @@ export const handler = async ( | |
signer, | ||
aliases, | ||
}); | ||
const baseUrl = SdkHelpers.getBaseUrl(chainId); | ||
const val = new Validator({ baseUrl }); | ||
// get the table schema to help map values to their type | ||
const { schema } = await val.getTableById({ | ||
chainId, | ||
tableId: tableId.toString(), | ||
}); | ||
|
||
const fileString = readFileSync(file).toString(); | ||
const dataObject = await parseCsvFile(fileString); | ||
|
@@ -71,7 +81,7 @@ export const handler = async ( | |
// need to capture row length now since `batchRows` will mutate the | ||
// rows Array to reduce memory overhead | ||
const rowCount = Number(rows.length); | ||
const statements = csvHelp.batchRows(rows, headers, definition); | ||
const statements = csvHelp.batchRows(rows, headers, schema, definition); | ||
|
||
const doImport = await confirmImport({ | ||
statements, | ||
|
@@ -82,21 +92,26 @@ export const handler = async ( | |
|
||
if (!doImport) return logger.log("aborting"); | ||
|
||
const results = await db.batch(statements.map((stmt) => db.prepare(stmt))); | ||
const stmts = statements.map((stmt) => db.prepare(stmt)); | ||
const results = await db.batch(stmts); | ||
// the batch method returns an array of results for reads, but in this case | ||
// its an Array of length 1 with a single Object containing txn data | ||
const result = results[0]; | ||
|
||
logger.log( | ||
`successfully inserted ${rowCount} row${ | ||
rowCount === 1 ? "" : "s" | ||
} into ${definition} | ||
const rec = await result.meta.txn?.wait(); | ||
if (rec?.errorEventIdx !== undefined) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. before, we were logging that the tx was successful. in reality, data was being inserted like |
||
logger.error(rec); | ||
} else { | ||
logger.log( | ||
`successfully inserted ${rowCount} row${ | ||
rowCount === 1 ? "" : "s" | ||
} into ${definition} | ||
transaction receipt: ${chalk.gray.bold( | ||
JSON.stringify(result.meta?.txn, null, 4), | ||
)} | ||
project id: ${projectId} | ||
environment id: ${environmentId}`, | ||
); | ||
); | ||
} | ||
} catch (err: any) { | ||
logger.error(err); | ||
} | ||
|
@@ -129,10 +144,10 @@ async function confirmImport(info: { | |
)} to insert ${chalk.yellow(info.rowCount)} row${ | ||
info.rowCount === 1 ? "" : "s" | ||
} into table ${chalk.yellow(info.tableName)} | ||
This can be done with a total of ${chalk.yellow(statementCount)} statment${ | ||
This can be done with a total of ${chalk.yellow(statementCount)} statement${ | ||
statementCount === 1 ? "" : "s" | ||
} | ||
The total size of the statment${ | ||
The total size of the statement${ | ||
statementCount === 1 ? "" : "s" | ||
} is: ${chalk.yellow(statementLength)} | ||
The estimated cost is ${cost} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,7 +11,7 @@ import { | |
} from "ethers"; | ||
import { z } from "zod"; | ||
import createKeccakHash from "keccak"; | ||
import { helpers as sdkHelpers } from "@tableland/sdk"; | ||
import { helpers as sdkHelpers, type Schema } from "@tableland/sdk"; | ||
import { init } from "@tableland/sqlparser"; | ||
import { type API, type ClientConfig, api } from "@tableland/studio-client"; | ||
|
||
|
@@ -158,16 +158,31 @@ export const csvHelp = { | |
batchRows: function ( | ||
rows: Array<Array<string | number>>, | ||
headers: string[], | ||
schema: Schema, | ||
table: string, | ||
) { | ||
let rowCount = 0; | ||
const batches = []; | ||
while (rows.length) { | ||
let statement = `INSERT INTO ${table}(${headers.join(",")})VALUES`; | ||
// map headers to schema columns to add in the header index | ||
const schemaWithIdx = headers.map((header, idx) => { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. basically, take the schema columns and add the header/col index wrt the CSV file to help with logic below |
||
const colInfo = schema.columns.find((col) => { | ||
// we cannot guarantee that an imported table will have backticks | ||
// around the column name, so we need to check for both | ||
return col.name === header || col.name === header.replace(/`/g, ""); | ||
}); | ||
return { | ||
idx, | ||
name: colInfo?.name, | ||
type: colInfo?.type, | ||
}; | ||
}); | ||
|
||
while ( | ||
rows.length && | ||
byteSize(statement + getNextValues(rows[0])) < MAX_STATEMENT_SIZE | ||
byteSize(statement + getNextValues(rows[0], schemaWithIdx)) < | ||
MAX_STATEMENT_SIZE | ||
) { | ||
const row = rows.shift(); | ||
if (!row) continue; | ||
|
@@ -178,7 +193,7 @@ export const csvHelp = { | |
); | ||
} | ||
// include comma between | ||
statement += getNextValues(row); | ||
statement += getNextValues(row, schemaWithIdx); | ||
} | ||
|
||
// remove last comma and add semicolon | ||
|
@@ -209,8 +224,22 @@ const byteSize = function (str: string) { | |
return new Blob([str]).size; | ||
}; | ||
|
||
const getNextValues = function (row: Array<string | number>) { | ||
return `(${row.join(",")}),`; | ||
const getNextValues = function ( | ||
row: Array<string | number>, | ||
schemaWithIdx: Array<Record<string, any>>, | ||
) { | ||
// wrap values in single quotes if the `type` is not `int` nor `integer` | ||
const rowFormatted = row.map((val, idx) => { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there are probs more performant ways to achieve this setup, but this will get the type for the row value based on the header index. if the CSV row value gets parsed as an empty string, assume a NULL value. else, use the SQL type to dictate single quote wrapping. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. thinking about this a bit more...the downside here is that it doesn't sanitize the data. so, if you have a CSV file with single quotes in the actual value, that could pose a syntax issue. i did consider refactoring this to use the |
||
const colInfo = schemaWithIdx.find((col) => { | ||
return col.idx === idx; | ||
}); | ||
const type = colInfo?.type; | ||
// empty csv values must be treated as `NULL` to avoid SQL string errors | ||
if (typeof val === "string" && val.length === 0) return "NULL"; | ||
if (type === "int" || type === "integer") return val; | ||
return `'${val}'`; | ||
}); | ||
return `(${rowFormatted.join(",")}),`; | ||
}; | ||
|
||
function getIdFromTableName(tableName: string, revIndx: number) { | ||
|
@@ -601,7 +630,13 @@ export const logger = { | |
|
||
export const parseCsvFile = async function (file: string): Promise<string[][]> { | ||
return await new Promise(function (resolve, reject) { | ||
const parser = parse(); | ||
// custom parsing | ||
const parserOptions = { | ||
skipEmptyLines: true, | ||
trim: true, | ||
}; | ||
|
||
const parser = parse(parserOptions); | ||
const rows: any[] = []; | ||
|
||
parser.on("readable", function () { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,189 @@ | ||
import path from "path"; | ||
import { fileURLToPath } from "url"; | ||
import { equal, match } from "node:assert"; | ||
import { getAccounts, getDatabase } from "@tableland/local"; | ||
import { afterEach, before, describe, test } from "mocha"; | ||
import { restore, spy } from "sinon"; | ||
import { temporaryWrite } from "tempy"; | ||
import mockStd from "mock-stdin"; | ||
import yargs from "yargs/yargs"; | ||
import * as modImportTable from "../src/commands/import-table.js"; | ||
import type { CommandOptions as ImportTableCommandOptions } from "../src/commands/import-table.js"; | ||
import * as mod from "../src/commands/import-data.js"; | ||
import { type GlobalOptions } from "../src/cli.js"; | ||
import { logger, wait } from "../src/utils.js"; | ||
import { | ||
TEST_TIMEOUT_FACTOR, | ||
TEST_API_BASE_URL, | ||
TEST_REGISTRY_PORT, | ||
TEST_PROJECT_ID, | ||
} from "./utils.js"; | ||
|
||
const _dirname = path.dirname(fileURLToPath(import.meta.url)); | ||
const accounts = getAccounts(`http://127.0.0.1:${TEST_REGISTRY_PORT}`); | ||
const account = accounts[10]; | ||
const db = getDatabase(account); | ||
|
||
const defaultArgs = [ | ||
"--store", | ||
path.join(_dirname, ".studioclisession.json"), | ||
"--privateKey", | ||
account.privateKey.slice(2), | ||
"--chain", | ||
"local-tableland", | ||
"--providerUrl", | ||
`http://127.0.0.1:${TEST_REGISTRY_PORT}/`, | ||
"--apiUrl", | ||
TEST_API_BASE_URL, | ||
"--projectId", | ||
TEST_PROJECT_ID, | ||
]; | ||
|
||
describe.only("commands/import-data", function () { | ||
this.timeout(30000 * TEST_TIMEOUT_FACTOR); | ||
|
||
let table1: string; | ||
let table2: string; | ||
const defName1 = "data_import_1"; | ||
const defName2 = "data_import_2"; | ||
const desc = "table description"; | ||
|
||
before(async function () { | ||
const batch = await db.batch([ | ||
// use no backticks vs. including them to emulate a non-Studio vs. Studio | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i noticed a comment about the Studio table columns having backticks around them, and the user might not know this: here. e.g., a col name is i can't remember—is this still the approach? it didn't look to have an impact here, though, because i used a table that was created outside of the Studio and without the backticked col names. |
||
// created table's column names (ensure csv header/col type parsing works) | ||
db.prepare(`create table ${defName1} (id int, val text);`), | ||
db.prepare(`create table ${defName2} (\`id\` int, \`val\` text);`), | ||
]); | ||
const res = await batch[0].meta.txn?.wait(); | ||
const tableNames = res?.names ?? []; | ||
table1 = tableNames[0]; | ||
table2 = tableNames[1]; | ||
|
||
await yargs(["import", "table", table1, desc, ...defaultArgs]) | ||
.command<ImportTableCommandOptions>(modImportTable) | ||
.parse(); | ||
await yargs(["import", "table", table2, desc, ...defaultArgs]) | ||
.command<ImportTableCommandOptions>(modImportTable) | ||
.parse(); | ||
|
||
await wait(1000); | ||
}); | ||
|
||
afterEach(function () { | ||
restore(); | ||
}); | ||
|
||
test("can import a single row", async function () { | ||
const csvStr = `id,val\n1,test_value`; | ||
const csvFile = await temporaryWrite(csvStr, { extension: "csv" }); | ||
|
||
const consoleLog = spy(logger, "log"); | ||
const stdin = mockStd.stdin(); | ||
setTimeout(() => { | ||
stdin.send("y\n"); | ||
}, 1000); | ||
await yargs(["import-data", defName1, csvFile, ...defaultArgs]) | ||
.command<GlobalOptions>(mod) | ||
.parse(); | ||
|
||
const res = consoleLog.getCall(0).firstArg; | ||
const successRes = res.match(/^(.*)$/m)[1]; | ||
|
||
equal(successRes, `successfully inserted 1 row into ${defName1}`); | ||
}); | ||
|
||
test.only("can import with quotes, escape chars, and multi line", async function () { | ||
/* eslint-disable no-useless-escape */ | ||
const csvStr = `id,val | ||
1,"I'll work" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @joewagner up to you! i've trying to mess with the csv parser to get things to work with these different test cases. lmk if this is what you were thinking. (I haven't been able to get the |
||
1,And I'll work | ||
4,This ends with a backslash \\ | ||
7,"Escapes \'single quotes\'" | ||
8,This "quote" works | ||
`; | ||
console.log(csvStr); | ||
const csvFile = await temporaryWrite(csvStr, { extension: "csv" }); | ||
|
||
const consoleLog = spy(logger, "error"); | ||
const stdin = mockStd.stdin(); | ||
setTimeout(() => { | ||
stdin.send("y\n"); | ||
}, 1000); | ||
await yargs(["import-data", defName1, csvFile, ...defaultArgs]) | ||
.command<GlobalOptions>(mod) | ||
.parse(); | ||
|
||
const res = consoleLog.getCall(0).firstArg; | ||
const successRes = res.match(/^(.*)$/m)[1]; | ||
|
||
equal(successRes, `successfully inserted 7 rows into ${defName1}`); | ||
}); | ||
|
||
test("can import with empty row values", async function () { | ||
const csvStr = `id,val\n1,\n,test_value\n`; | ||
const csvFile = await temporaryWrite(csvStr, { extension: "csv" }); | ||
|
||
const consoleLog = spy(logger, "log"); | ||
const stdin = mockStd.stdin(); | ||
setTimeout(() => { | ||
stdin.send("y\n"); | ||
}, 1000); | ||
await yargs(["import-data", defName2, csvFile, ...defaultArgs]) | ||
.command<GlobalOptions>(mod) | ||
.parse(); | ||
|
||
const res = consoleLog.getCall(0).firstArg; | ||
const successRes = res.match(/^(.*)$/m)[1]; | ||
|
||
equal(successRes, `successfully inserted 2 rows into ${defName2}`); | ||
}); | ||
|
||
test("fails with wrong headers", async function () { | ||
const csvStr = `not_id,not_val\n1,test_value\n`; | ||
const csvFile = await temporaryWrite(csvStr, { extension: "csv" }); | ||
|
||
const consoleError = spy(logger, "error"); | ||
const stdin = mockStd.stdin(); | ||
setTimeout(() => { | ||
stdin.send("y\n"); | ||
}, 1000); | ||
await yargs(["import-data", defName2, csvFile, ...defaultArgs]) | ||
.command<GlobalOptions>(mod) | ||
.parse(); | ||
|
||
const res = consoleError.getCall(0).firstArg; | ||
const regex = new RegExp(`table ${table2} has no column named not_id`); | ||
match(res.toString(), regex); | ||
}); | ||
|
||
test("fails with mismatched header and row length", async function () { | ||
let csvStr = `id\n1,test_value\n`; | ||
let csvFile = await temporaryWrite(csvStr, { extension: "csv" }); | ||
|
||
const consoleError = spy(logger, "error"); | ||
const stdin = mockStd.stdin(); | ||
setTimeout(() => { | ||
stdin.send("y\n"); | ||
}, 1000); | ||
await yargs(["import-data", defName2, csvFile, ...defaultArgs]) | ||
.command<GlobalOptions>(mod) | ||
.parse(); | ||
|
||
let res = consoleError.getCall(0).firstArg; | ||
const regex = /Invalid Record Length/; | ||
match(res.toString(), regex); | ||
|
||
csvStr = `id,val\n1\n`; | ||
csvFile = await temporaryWrite(csvStr, { extension: "csv" }); | ||
setTimeout(() => { | ||
stdin.send("y\n"); | ||
}, 1000); | ||
await yargs(["import-data", defName2, csvFile, ...defaultArgs]) | ||
.command<GlobalOptions>(mod) | ||
.parse(); | ||
|
||
res = consoleError.getCall(0).firstArg; | ||
match(res.toString(), regex); | ||
}); | ||
}); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
since we can't assume the type (e.g., a JS number could be a TEXT column), we have to map the schema col types to the CSV headers (see the update in
utils
).