Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: loading of duckdb every query #4903

Merged
merged 4 commits into from
Jan 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
145 changes: 94 additions & 51 deletions webui/src/pages/repositories/repository/fileRenderers/data.tsx
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
import React, {FC, useEffect, useState} from "react";
import React, {FC, FormEvent, useCallback, useEffect, useRef, useState} from "react";
import {Error, Loading} from "../../../../lib/components/controls";
import {withConnection} from "./duckdb";
import {Table} from "react-bootstrap";
import Alert from "react-bootstrap/Alert";
import {getConnection} from "./duckdb";
import * as duckdb from '@duckdb/duckdb-wasm';
import * as arrow from 'apache-arrow';
import Form from "react-bootstrap/Form";
import Button from "react-bootstrap/Button";
import {DatabaseIcon} from "@primer/octicons-react";
import dayjs from "dayjs";
import {RendererComponent} from "./types";
import Table from "react-bootstrap/Table";


const MAX_RESULTS_RETURNED = 1000;

export const DataLoader: FC = () => {
return <Loading/>
Expand All @@ -26,91 +30,130 @@ LIMIT 20`
FROM read_csv(lakefs_object('${repoId}', '${refId}', '${path}'), DELIM='\t', AUTO_DETECT=TRUE)
LIMIT 20`
}
const [query, setQuery] = useState<string>(initialQuery)
const [shouldSubmit, setShouldSubmit] = useState<boolean>(true)
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const [data, setData] = useState<any[] | null>(null);
const [data, setData] = useState<arrow.Table<any> | null>(null);
const [error, setError] = useState<string | null>(null)
const [loading, setLoading] = useState<boolean>(false)

const sql = useRef<HTMLTextAreaElement>(null);

const handleSubmit = useCallback((event: FormEvent<HTMLFormElement>) => {
event.preventDefault()
setShouldSubmit(prev => !prev)
}, [setShouldSubmit])

useEffect(() => {
withConnection(async conn => {
const results = await conn.query(query)
const data = results.toArray()
setData(data)
if (!sql || !sql.current)
return
const runQuery = async (sql: string) => {
setLoading(true)
setError(null)
}).catch(e => {
setError(e.toString())
})
let conn: duckdb.AsyncDuckDBConnection | null
try {
conn = await getConnection()
} catch (e) {
setData(null)
setError(e.toString())
setLoading(false)
return
}

try {
const results = await conn.query(sql)
setData(results)
setError(null)
} catch (e) {
setError(e.toString())
setData(null)
} finally {
setLoading(false)
if (conn !== null)
await conn.close()
}
}
runQuery(sql.current.value).catch(console.error);
}, [repoId, refId, path, shouldSubmit])

let content;
const button = (
<Button type="submit" variant="success" disabled={loading}>
<DatabaseIcon /> {" "}
{ loading ? "Executing..." : "Execute" }
</Button>
);

if (error) {
content = <Error error={error}/>
} else if (data === null) {
content = <DataLoader/>
} else {
if (!data || data.length === 0) {

if (!data || data.numRows === 0) {
content = (
<p className={"text-md-center mt-5 mb-5"}>
<p className="text-md-center mt-5 mb-5">
No rows returned.
</p>
)
} else {
const totalRows = data.length
const fields = data.schema.fields
const totalRows = data.numRows
let res = data;
if (totalRows > 100) {
res = data.slice(0, 100)
if (totalRows > MAX_RESULTS_RETURNED) {
res = data.slice(0, MAX_RESULTS_RETURNED)
}
content = (
<>
<Table striped bordered hover size={"sm"} responsive={"sm"}>
<thead>
<tr>
{Object.getOwnPropertyNames(res[0]).map(name =>
<th key={name}>{name}</th>
)}
</tr>
</thead>
<tbody>
{res.map((row, i) => (
<tr key={`row-${i}`}>
{Object.getOwnPropertyNames(res[0]).map(name => (
<DataRow key={`row-${i}-${name}`} value={row[name]}/>
))}
</tr>
))}
</tbody>
</Table>
{(res.length < data.length) &&
<Alert>{`Showing only the first ${res.length} rows (out of ${data.length})`}</Alert>
{(res.numRows < data.numRows) &&
<small>{`Showing only the first ${res.numRows.toLocaleString()} rows (out of ${data.numRows.toLocaleString()})`}</small>
}
<div className="object-viewer-sql-results">
<Table striped bordered hover size={"sm"} responsive={"sm"}>
<thead className="thead-dark">
<tr>
{fields.map((field, i) =>
<th key={i}>
{field.name}
<br/>
<small>{field.type.toString()}</small>
</th>
)}
</tr>
</thead>
<tbody>
{[...res].map((row, i) => (
<tr key={`row-${i}`}>
{[...row].map((v, j: number) => {
return (
<DataRow key={`col-${i}-${j}`} value={v[1]}/>
)

})}
</tr>
))}
</tbody>
</Table>
</div>
</>
)
}
}


return (
<div>
<Form onSubmit={(e) => {
e.preventDefault()
setShouldSubmit(!shouldSubmit)
}}>
<Form onSubmit={handleSubmit}>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Like! 🤩

<Form.Group className="mb-2 mt-2" controlId="objectQuery">
<Form.Control as="textarea" className="object-viewer-sql-input" rows={5} value={query} spellCheck={false} onChange={e => {
setQuery(e.target.value)
}} />
<Form.Control as="textarea" className="object-viewer-sql-input" rows={5} defaultValue={initialQuery} spellCheck={false} ref={sql} autoComplete="off"/>

<Form.Text className="text-muted align-right">
Powered by <a href="https://duckdb.org/2021/10/29/duckdb-wasm.html" target="_blank" rel="noreferrer">DuckDB-WASM</a>.
For a full SQL reference, see the <a href="https://duckdb.org/docs/sql/statements/select" target="_blank" rel="noreferrer">DuckDB Documentation</a>
</Form.Text>

</Form.Group>
<Button type="submit" variant={"success"} >
<DatabaseIcon/>{' '}
Execute
</Button>
{button}
</Form>
<div className={"mt-3 object-viewer-sql-results"}>
<div className="mt-3">
{content}
</div>
</div>
Expand Down
101 changes: 76 additions & 25 deletions webui/src/pages/repositories/repository/fileRenderers/duckdb.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,54 @@ import duckdb_wasm_eh from '@duckdb/duckdb-wasm/dist/duckdb-eh.wasm?url';
import eh_worker from '@duckdb/duckdb-wasm/dist/duckdb-browser-eh.worker.js?url';


// based on the replacement rules on the percent-encoding MDN page:
// https://developer.mozilla.org/en-US/docs/Glossary/Percent-encoding
// also, I tried doing something nicer with list comprehensions and printf('%x') to convert
// from unicode code point to hex - DuckDB didn't seem to evaluate lambdas and list comprehensions
// Issue: https://github.com/duckdb/duckdb/issues/5821
// when padding a macro to a table function such as read_parquet() or read_csv().
// so - string replacements it is.
const URL_ENCODE_MACRO_SQL = `
CREATE MACRO p_encode(s) AS
replace(
replace(
replace(
replace(
replace(
replace(
replace(
replace(
replace(
replace(
replace(
replace(
replace(
replace(
replace(
replace(
replace(
replace(
replace(s, '%', '%25'),
'/', '%2F'),
'?', '%3F'),
'#', '%23'),
'[', '%5B'),
']', '%5D'),
'@', '%40'),
'!', '%21'),
'$', '%24'),
'&', '%26'),
'''', '%27'),
'(', '%28'),
')', '%29'),
'+', '%2B'),
',', '%2C'),
';', '%3B'),
'=','%3D'),
' ', '%20'),
':', '%3A');
`


const MANUAL_BUNDLES: duckdb.DuckDBBundles = {
mvp: {
Expand All @@ -16,33 +64,36 @@ const MANUAL_BUNDLES: duckdb.DuckDBBundles = {
mainWorker: eh_worker,
},
};
// Select a bundle based on browser checks
let _bundle: duckdb.DuckDBBundle;
let bundlePromise: Promise<duckdb.DuckDBBundle>;

function getBundle(): Promise<duckdb.DuckDBBundle> {
if (_bundle) return new Promise(() => _bundle)
if (!bundlePromise) {
bundlePromise = duckdb.selectBundle(MANUAL_BUNDLES)


let _db: duckdb.AsyncDuckDB | null
let _worker: Worker | null

async function getDB(): Promise<duckdb.AsyncDuckDB> {
if (!_db) {
const bundle = await duckdb.selectBundle(MANUAL_BUNDLES)
if (!bundle.mainWorker) {
throw Error("could not initialize DuckDB")
}
_worker = new Worker(bundle.mainWorker);
const db = new duckdb.AsyncDuckDB(new duckdb.VoidLogger(), _worker);
await db.instantiate(bundle.mainModule, bundle.pthreadWorker);
const conn = await db.connect()
// await conn.query(`SET access_mode = READ_ONLY`)
await conn.query(URL_ENCODE_MACRO_SQL)
await conn.query(`
CREATE MACRO lakefs_object(repoId, refId, path) AS
'${document.location.protocol}//${document.location.host}/api/v1/repositories/' ||
p_encode(repoId) || '/refs/' || p_encode(refId) || '/objects?path=' || p_encode(path);
`)
await conn.close()
_db = db
}
return bundlePromise
return _db
}

export async function withConnection(cb: (conn: duckdb.AsyncDuckDBConnection) => void) {
export async function getConnection(): Promise<duckdb.AsyncDuckDBConnection> {
// Instantiate the async version of DuckDB-wasm
const bundle = await getBundle();
if (!bundle.mainWorker) {
throw Error("could not initialize DuckDB")
}
const worker = new Worker(bundle.mainWorker);
const logger = new duckdb.VoidLogger();
const db = new duckdb.AsyncDuckDB(logger, worker);
await db.instantiate(bundle.mainModule, bundle.pthreadWorker);
const conn = await db.connect()
await conn.query(`CREATE MACRO lakefs_object(repoId, refId, path) AS '${document.location.protocol}//${document.location.host}/api/v1/repositories/' || repoId || '/refs/' || refId || '/objects?path=' || replace(path, '/', '%2F');`)
const results = await cb(conn)
await conn.close()
await db.terminate()
await worker.terminate()
return results
const db = await getDB()
return await db.connect()
}
7 changes: 4 additions & 3 deletions webui/src/styles/globals.css
Original file line number Diff line number Diff line change
Expand Up @@ -594,10 +594,11 @@ td .form-group {
color: #FFFFFF;
}
.object-viewer-sql-results {
max-height: 350px;
overflow: auto;
font-size: .7rem;
font-size: .75rem;
max-height: 400px;
}

.image-container {
padding: 30px;
}
Expand All @@ -615,4 +616,4 @@ td .form-group {

.required-field-label {
color: red;
}
}