Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

1666 pull data from previous years #1711

Merged
merged 21 commits into from
May 9, 2024
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
5856018
Set up 2023 & 2024 parquet file path and pointers
Skydodle Apr 13, 2024
0bb9b1c
Update 2024 file name in SQL query
Skydodle Apr 13, 2024
780f5a1
Clarifying query comment
Skydodle Apr 13, 2024
e11c8a2
Renabled 2023 selection on day picker
Skydodle Apr 13, 2024
2491cf2
Used startDate prop to extract year and query dataset file by year dy…
Skydodle Apr 13, 2024
040bb2d
Fix: add year changed check to set new table
Skydodle Apr 14, 2024
d369db2
Fix: update table name
Skydodle Apr 14, 2024
ad21b02
Fix: add isTableLoading state to have loading model cover table creat…
Skydodle Apr 14, 2024
60a2d26
docs: add comment for dynamic table creation by year and year
Skydodle Apr 15, 2024
21e3f79
test: add log to track table creation time
Skydodle Apr 15, 2024
e2c93d9
refactor: move dynamic table name logic to DbContext so it can be acc…
Skydodle Apr 15, 2024
fdc8ec9
refactor: use context managed `tableNameByYear` instead of local stat…
Skydodle Apr 15, 2024
e70bcb8
fix: update table name with dynamic context state `tableNameByYear`
Skydodle Apr 15, 2024
9cfe1ab
style: remove unused comment out code
Skydodle Apr 15, 2024
4e8f9f4
Refactor: remove unused variable
Skydodle Apr 19, 2024
7f8406b
Merge branch 'main' of https://github.com/hackforla/311-data into 166…
Skydodle Apr 21, 2024
8898544
fix: handle cross-year data request with union of two table queries
Skydodle Apr 30, 2024
661a266
fix: add query to handle cross-year search date for pin details
Skydodle Apr 30, 2024
f9c091b
refactor: clean up old code and added comment explanation
Skydodle May 3, 2024
e0859fe
feat: add time performance console logs for table creation, data load…
Skydodle May 9, 2024
19e5bf5
fix: update correct proptypes
Skydodle May 9, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion components/Footer/LastUpdated.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ function LastUpdated() {

useEffect(() => {
const getLastUpdated = async () => {
const getLastUpdatedSQL = 'select max(createddate) from requests;';
const getLastUpdatedSQL = 'select max(createddate) from requests_2024;';

const lastUpdatedAsArrowTable = await conn.query(getLastUpdatedSQL);
const results = ddbh.getTableData(lastUpdatedAsArrowTable);
Expand Down
4 changes: 2 additions & 2 deletions components/Map/RequestDetail.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,10 @@ function RequestDetail({
// dispatchGetPinInfoRequest,
dispatchUpdatePinInfo,
}) {
const { conn } = useContext(DbContext);
const { conn, tableNameByYear } = useContext(DbContext);
const getPinInfo = useCallback(async () => {
try {
const getPinsInfoSQL = `SELECT * FROM requests WHERE TRIM(SRNumber) = '${requestId}'`;
const getPinsInfoSQL = `SELECT * FROM ${tableNameByYear} WHERE TRIM(SRNumber) = '${requestId}'`;

const pinsInfoAsArrowTable = await conn.query(getPinsInfoSQL);
const newPinsInfo = ddbh.getTableData(pinsInfoAsArrowTable);
Expand Down
58 changes: 39 additions & 19 deletions components/Map/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ class MapContainer extends React.Component {
position: props.position,
lastUpdated: props.lastUpdated,
selectedTypes: this.getSelectedTypes(),
isTableLoading: false,
};

// We store the raw requests from the API call here, but eventually they aremap/inde
Expand All @@ -65,13 +66,27 @@ class MapContainer extends React.Component {
}

createRequestsTable = async () => {
const { conn } = this.context;
this.setState({ isTableLoading: true });
const { conn, tableNameByYear } = this.context;
const startDate = this.props.startDate; // directly use the startDate prop transformed for redux store
const year = moment(startDate).year(); // extrac the year
const datasetFileName = `requests${year}.parquet`;

// Create the 'requests' table.
// Create the year data table if not exist already
const createSQL =
'CREATE TABLE requests AS SELECT * FROM "requests.parquet"'; // parquet

await conn.query(createSQL);
`CREATE TABLE IF NOT EXISTS ${tableNameByYear} AS SELECT * FROM "${datasetFileName}"`; // query from parquet

const startTime = performance.now(); // start the time tracker

try {
await conn.query(createSQL);
const endTime = performance.now() // end the timer
console.log(`Table created and dataset registered. Time taken: ${Math.floor(endTime - startTime)} ms.`);
} catch (error) {
console.error("Error in creating table or registering dataset:", error);
} finally {
this.setState({ isTableLoading: false});
}
};

async componentDidMount(props) {
Expand All @@ -83,19 +98,22 @@ class MapContainer extends React.Component {

async componentDidUpdate(prevProps) {
const { activeMode, pins, startDate, endDate } = this.props;
function didDateRangeChange() {
// Check that endDate is not null since we only want to retrieve data
// when both the startDate and endDate are selected.
return (
(prevProps.startDate != startDate || prevProps.endDate != endDate) &&
endDate != null
);
}

// create conditions to check if year or startDate or endDate changed
const yearChanged = moment(prevProps.startDate).year() !== moment(startDate).year();
const startDateChanged = prevProps.startDate !== startDate;
const endDateChanged = prevProps.endDate !== endDate;

// Check that endDate is not null since we only want to retrieve data
// when both the startDate and endDate are selected.
const didDateRangeChange = (yearChanged || startDateChanged || endDateChanged) && endDate !== null;

if (
prevProps.activeMode !== activeMode ||
prevProps.pins !== pins ||
didDateRangeChange()
didDateRangeChange
) {
await this.createRequestsTable();
await this.setData();
}
}
Expand Down Expand Up @@ -292,10 +310,11 @@ class MapContainer extends React.Component {

getAllRequests = async (startDate, endDate) => {
try {
const { conn } = this.context;
const { conn, tableNameByYear } = this.context;
const year = moment(startDate).year();

// Execute a SELECT query from 'requests' table
const selectSQL = `SELECT * FROM requests WHERE CreatedDate between '${startDate}' and '${endDate}'`;
const selectSQL = `SELECT * FROM ${tableNameByYear} WHERE CreatedDate between '${startDate}' and '${endDate}'`;

const requestsAsArrowTable = await conn.query(selectSQL);

Expand All @@ -304,7 +323,7 @@ class MapContainer extends React.Component {
this.endTime = performance.now(); // end bnechmark

console.log(
`Time taken to bootstrap db: ${this.endTime - this.startTime}ms`
`Time taken to bootstrap db: ${Math.floor(this.endTime - this.startTime)} ms`
);
return requests;
} catch (e) {
Expand Down Expand Up @@ -380,7 +399,7 @@ class MapContainer extends React.Component {
isMapLoading,
isDbLoading,
} = this.props;
const { ncCounts, ccCounts, selectedTypes } = this.state;
const { ncCounts, ccCounts, selectedTypes, isTableLoading } = this.state;
return (
<div className={classes.root}>
<Map
Expand All @@ -395,7 +414,7 @@ class MapContainer extends React.Component {
initialState={this.initialState}
/>
<CookieNotice />
{(isDbLoading || isMapLoading) ? (
{(isDbLoading || isMapLoading || isTableLoading) ? (
<>
<LoadingModal />
<FunFactCard />
Expand Down Expand Up @@ -443,6 +462,7 @@ MapContainer.propTypes = {};

MapContainer.defaultProps = {};

// connect MapContainer to Redux store
export default connect(
mapStateToProps,
mapDispatchToProps
Expand Down
2 changes: 1 addition & 1 deletion components/common/ReactDayPicker/ReactDayPicker.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ function ReactDayPicker({
onDayClick={handleDayClick}
onDayMouseEnter={handleDayMouseEnter}
weekdayElement={<WeekDay />}
fromMonth={new Date(2023, 12)}
fromMonth={new Date(2022, 12)}
/>
</>
);
Expand Down
7 changes: 6 additions & 1 deletion components/db/DbContext.jsx
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
import React from 'react';

const DbContext = React.createContext();
const DbContext = React.createContext({
db: null,
conn: null,
worker: null,
tableName: '',
});

export default DbContext;
50 changes: 40 additions & 10 deletions components/db/DbProvider.jsx
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
import React, { useEffect, useState } from 'react';
import PropTypes from 'proptypes';
import { connect } from 'react-redux';
import PropTypes from 'prop-types';
import * as duckdb from '@duckdb/duckdb-wasm';
import Worker from 'web-worker';
import DbContext from '@db/DbContext';
import moment from 'moment';

// List of remote dataset locations used by db.registerFileURL
const datasets = {
parquet: {
// huggingface
hfYtd:
'https://huggingface.co/datasets/311-data/2024/resolve/main/2024.parquet', // year-to-date
hfYtd2024:
'https://huggingface.co/datasets/311-data/2024/resolve/main/2024.parquet', // 2024 year-to-date
hfYtd2023:
'https://huggingface.co/datasets/311-data/2023/resolve/main/2023.parquet', // 2023 year-to-date
hfLastMonth:
'https://huggingface.co/datasets/edwinjue/311-data-last-month/resolve/refs%2Fconvert%2Fparquet/edwinjue--311-data-last-month/csv-train.parquet', // last month
},
Expand All @@ -20,10 +24,11 @@ const datasets = {
},
};

function DbProvider({ children }) {
function DbProvider({ children, startDate }) {
const [db, setDb] = useState(null);
const [conn, setConn] = useState(null);
const [worker, setWorker] = useState(null);
const [tableNameByYear, setTableNameByYear] = useState('');

useEffect(() => {
const dbInitialize = async () => {
Expand All @@ -49,14 +54,20 @@ function DbProvider({ children }) {

await newDb.instantiate(
DUCKDB_CONFIG.mainModule,
DUCKDB_CONFIG.pthreadWorker
DUCKDB_CONFIG.pthreadWorker,
);

// register parquet
await newDb.registerFileURL(
'requests.parquet',
datasets.parquet.hfYtd,
4 // HTTP = 4. For more options: https://tinyurl.com/DuckDBDataProtocol
'requests2024.parquet',
datasets.parquet.hfYtd2024,
4, // HTTP = 4. For more options: https://tinyurl.com/DuckDBDataProtocol
);

await newDb.registerFileURL(
'requests2023.parquet',
datasets.parquet.hfYtd2023,
4,
);

// Create db connection
Expand Down Expand Up @@ -100,24 +111,43 @@ function DbProvider({ children }) {
// Important: dependency array must be empty or you will get the following error
// "cannot send a message since the worker is not set" and app will infinite loop

// This useEffect specifically handle dynamic table name generation
// separated from the previous useEffect that handles db initialization and teardown
useEffect(() => {
if (startDate) {
const year = moment(startDate).year();
setTableNameByYear(`requests_${year}`);
}
}, [startDate]); // Depend on startDate

// block until db, conn, worker are available
if (!db || !conn || !worker) {
return null;
}

return (
<DbContext.Provider value={{ db, conn, worker }}>
<DbContext.Provider value={{
db, conn, worker, tableNameByYear,
}}
>
{children}
</DbContext.Provider>
);
}

DbProvider.propTypes = {
children: PropTypes.node,
startDate: PropTypes.string,
};

DbProvider.defaultProps = {
children: null,
startDate: PropTypes.string,
};

export default DbProvider;
// connect DbProvider to Redux to get startDate
const mapStateToProps = state => ({
startDate: state.filters.startDate,
});

export default connect(mapStateToProps)(DbProvider);