Skip to content

Commit

Permalink
Merge pull request #1711 from Skydodle/1666-pull-data-from-previous-y…
Browse files Browse the repository at this point in the history
…ears

1666 pull data from previous years
  • Loading branch information
Skydodle authored May 9, 2024
2 parents 54e7923 + 19e5bf5 commit 3a6fc04
Show file tree
Hide file tree
Showing 6 changed files with 149 additions and 44 deletions.
2 changes: 1 addition & 1 deletion components/Footer/LastUpdated.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ function LastUpdated() {

useEffect(() => {
const getLastUpdated = async () => {
const getLastUpdatedSQL = 'select max(createddate) from requests;';
const getLastUpdatedSQL = 'select max(createddate) from requests_2024;';

const lastUpdatedAsArrowTable = await conn.query(getLastUpdatedSQL);
const results = ddbh.getTableData(lastUpdatedAsArrowTable);
Expand Down
31 changes: 28 additions & 3 deletions components/Map/RequestDetail.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,34 @@ function RequestDetail({
agencies,
// dispatchGetPinInfoRequest,
dispatchUpdatePinInfo,
startDate,
endDate,
}) {
const { conn } = useContext(DbContext);
const getPinInfo = useCallback(async () => {
if (!requestId) return;

try {
const getPinsInfoSQL = `SELECT * FROM requests WHERE TRIM(SRNumber) = '${requestId}'`;
const startYear = moment(startDate).year();
const endYear = moment(endDate).year();

let getPinsInfoSQL = '';

if (startYear === endYear) {
// If search date range is within the same year
const tableName = `requests_${startYear}`;
getPinsInfoSQL = `SELECT * FROM ${tableName} WHERE TRIM(SRNumber) = '${requestId}'`;
} else {
// If search date range is across two different years
const tableNameStartYear = `requests_${startYear}`;
const tableNameEndYear = `requests_${endYear}`;

getPinsInfoSQL = `
(SELECT * FROM ${tableNameStartYear} WHERE TRIM(SRNumber) = '${requestId}')
UNION ALL
(SELECT * FROM ${tableNameEndYear} WHERE TRIM(SRNumber) = '${requestId}')
`;
}

const pinsInfoAsArrowTable = await conn.query(getPinsInfoSQL);
const newPinsInfo = ddbh.getTableData(pinsInfoAsArrowTable);
Expand All @@ -76,12 +99,12 @@ function RequestDetail({
&& Array.isArray(newPinsInfo)
&& newPinsInfo.length > 0
) {
dispatchUpdatePinInfo(newPinsInfo[0]);
dispatchUpdatePinInfo(newPinsInfo[0]); // Assumes first entry is correct, adjust as needed
}
} catch (e) {
console.error('RequestDetail: Error occurred: ', e);
}
}, [requestId, conn, dispatchUpdatePinInfo]);
}, [requestId, conn, dispatchUpdatePinInfo, startDate, endDate]);

useEffect(() => {
async function fetchPins() {
Expand Down Expand Up @@ -244,6 +267,8 @@ const mapStateToProps = state => ({
pinsInfo: state.data.pinsInfo,
requestTypes: state.metadata.requestTypes,
agencies: state.metadata.agencies,
startDate: state.filters.startDate,
endDate: state.filters.endDate,
});

const mapDispatchToProps = dispatch => ({
Expand Down
101 changes: 73 additions & 28 deletions components/Map/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ class MapContainer extends React.Component {
position: props.position,
lastUpdated: props.lastUpdated,
selectedTypes: this.getSelectedTypes(),
acknowledgeModalShown: false
acknowledgeModalShown: false,
isTableLoading: false,
};

// We store the raw requests from the API call here, but eventually they aremap/inde
Expand All @@ -66,13 +67,27 @@ class MapContainer extends React.Component {
}

createRequestsTable = async () => {
const { conn } = this.context;
this.setState({ isTableLoading: true });
const { conn, tableNameByYear } = this.context;
const startDate = this.props.startDate; // directly use the startDate prop transformed for redux store
const year = moment(startDate).year(); // extrac the year
const datasetFileName = `requests${year}.parquet`;

// Create the 'requests' table.
// Create the year data table if not exist already
const createSQL =
'CREATE TABLE requests AS SELECT * FROM "requests.parquet"'; // parquet

await conn.query(createSQL);
`CREATE TABLE IF NOT EXISTS ${tableNameByYear} AS SELECT * FROM "${datasetFileName}"`; // query from parquet

const startTime = performance.now(); // start the time tracker

try {
await conn.query(createSQL);
const endTime = performance.now() // end the timer
console.log(`Dataset registration & table creation (by year) time: ${Math.floor(endTime - startTime)} ms.`);
} catch (error) {
console.error("Error in creating table or registering dataset:", error);
} finally {
this.setState({ isTableLoading: false});
}
};

async componentDidMount(props) {
Expand All @@ -84,19 +99,22 @@ class MapContainer extends React.Component {

async componentDidUpdate(prevProps) {
const { activeMode, pins, startDate, endDate } = this.props;
function didDateRangeChange() {
// Check that endDate is not null since we only want to retrieve data
// when both the startDate and endDate are selected.
return (
(prevProps.startDate != startDate || prevProps.endDate != endDate) &&
endDate != null
);
}

// create conditions to check if year or startDate or endDate changed
const yearChanged = moment(prevProps.startDate).year() !== moment(startDate).year();
const startDateChanged = prevProps.startDate !== startDate;
const endDateChanged = prevProps.endDate !== endDate;

// Check that endDate is not null since we only want to retrieve data
// when both the startDate and endDate are selected.
const didDateRangeChange = (yearChanged || startDateChanged || endDateChanged) && endDate !== null;

if (
prevProps.activeMode !== activeMode ||
prevProps.pins !== pins ||
didDateRangeChange()
didDateRangeChange
) {
await this.createRequestsTable();
await this.setData();
}
}
Expand Down Expand Up @@ -291,27 +309,53 @@ class MapContainer extends React.Component {
return dateArray;
};

getAllRequests = async (startDate, endDate) => {
try {
const { conn } = this.context;
// To handle cross-year date ranges, we check if the startDate and endDate year are the same year
// if same year, we simply query from that year's table
// if different years, we query both startDate year and endDate year, then union the result

async getAllRequests(startDate, endDate) {
const { conn } = this.context;
const startYear = moment(startDate).year();
const endYear = moment(endDate).year();

let selectSQL = '';

// Execute a SELECT query from 'requests' table
const selectSQL = `SELECT * FROM requests WHERE CreatedDate between '${startDate}' and '${endDate}'`;
try {
if (startYear === endYear) {
// If the dates are within the same year, query that single year's table.
const tableName = `requests_${startYear}`;
selectSQL = `SELECT * FROM ${tableName} WHERE CreatedDate BETWEEN '${startDate}' AND '${endDate}'`;
} else {
// If the dates span multiple years, create two queries and union them.
const tableNameStartYear = `requests_${startYear}`;
const endOfStartYear = moment(startDate).endOf('year').format('YYYY-MM-DD');
const tableNameEndYear = `requests_${endYear}`;
const startOfEndYear = moment(endDate).startOf('year').format('YYYY-MM-DD');

selectSQL = `
(SELECT * FROM ${tableNameStartYear} WHERE CreatedDate BETWEEN '${startDate}' AND '${endOfStartYear}')
UNION ALL
(SELECT * FROM ${tableNameEndYear} WHERE CreatedDate BETWEEN '${startOfEndYear}' AND '${endDate}')
`;
}

const dataLoadStartTime = performance.now();
const requestsAsArrowTable = await conn.query(selectSQL);
const dataLoadEndTime = performance.now();

console.log(`Data loading time: ${Math.floor(dataLoadEndTime - dataLoadStartTime)} ms`);

const requests = ddbh.getTableData(requestsAsArrowTable);
const mapLoadEndTime = performance.now();

this.endTime = performance.now(); // end bnechmark
console.log(`Map loading time: ${Math.floor(mapLoadEndTime - dataLoadEndTime)} ms`);

console.log(
`Time taken to bootstrap db: ${this.endTime - this.startTime}ms`
);
return requests;
} catch (e) {
console.error(e);
console.error("Error during database query execution:", e);
}
};
}


setData = async () => {
const { startDate, endDate, dispatchGetDbRequest, dispatchGetDataRequest } =
Expand Down Expand Up @@ -385,7 +429,7 @@ class MapContainer extends React.Component {
isMapLoading,
isDbLoading,
} = this.props;
const { ncCounts, ccCounts, selectedTypes, acknowledgeModalShown } = this.state;
const { ncCounts, ccCounts, selectedTypes, acknowledgeModalShown, isTableLoading } = this.state;
return (
<div className={classes.root}>
<Map
Expand All @@ -400,7 +444,7 @@ class MapContainer extends React.Component {
initialState={this.initialState}
/>
<CookieNotice />
{(isDbLoading || isMapLoading) ? (
{(isDbLoading || isMapLoading || isTableLoading) ? (
<>
<LoadingModal />
<FunFactCard />
Expand Down Expand Up @@ -448,6 +492,7 @@ MapContainer.propTypes = {};

MapContainer.defaultProps = {};

// connect MapContainer to Redux store
export default connect(
mapStateToProps,
mapDispatchToProps
Expand Down
2 changes: 1 addition & 1 deletion components/common/ReactDayPicker/ReactDayPicker.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ function ReactDayPicker({
onDayClick={handleDayClick}
onDayMouseEnter={handleDayMouseEnter}
weekdayElement={<WeekDay />}
fromMonth={new Date(2023, 12)}
fromMonth={new Date(2022, 12)}
/>
</>
);
Expand Down
7 changes: 6 additions & 1 deletion components/db/DbContext.jsx
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
import React from 'react';

const DbContext = React.createContext();
const DbContext = React.createContext({
db: null,
conn: null,
worker: null,
tableNameByYear: '',
});

export default DbContext;
50 changes: 40 additions & 10 deletions components/db/DbProvider.jsx
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
import React, { useEffect, useState } from 'react';
import PropTypes from 'proptypes';
import { connect } from 'react-redux';
import PropTypes from 'prop-types';
import * as duckdb from '@duckdb/duckdb-wasm';
import Worker from 'web-worker';
import DbContext from '@db/DbContext';
import moment from 'moment';

// List of remote dataset locations used by db.registerFileURL
const datasets = {
parquet: {
// huggingface
hfYtd:
'https://huggingface.co/datasets/311-data/2024/resolve/main/2024.parquet', // year-to-date
hfYtd2024:
'https://huggingface.co/datasets/311-data/2024/resolve/main/2024.parquet', // 2024 year-to-date
hfYtd2023:
'https://huggingface.co/datasets/311-data/2023/resolve/main/2023.parquet', // 2023 year-to-date
hfLastMonth:
'https://huggingface.co/datasets/edwinjue/311-data-last-month/resolve/refs%2Fconvert%2Fparquet/edwinjue--311-data-last-month/csv-train.parquet', // last month
},
Expand All @@ -20,10 +24,11 @@ const datasets = {
},
};

function DbProvider({ children }) {
function DbProvider({ children, startDate }) {
const [db, setDb] = useState(null);
const [conn, setConn] = useState(null);
const [worker, setWorker] = useState(null);
const [tableNameByYear, setTableNameByYear] = useState('');

useEffect(() => {
const dbInitialize = async () => {
Expand All @@ -49,14 +54,20 @@ function DbProvider({ children }) {

await newDb.instantiate(
DUCKDB_CONFIG.mainModule,
DUCKDB_CONFIG.pthreadWorker
DUCKDB_CONFIG.pthreadWorker,
);

// register parquet
await newDb.registerFileURL(
'requests.parquet',
datasets.parquet.hfYtd,
4 // HTTP = 4. For more options: https://tinyurl.com/DuckDBDataProtocol
'requests2024.parquet',
datasets.parquet.hfYtd2024,
4, // HTTP = 4. For more options: https://tinyurl.com/DuckDBDataProtocol
);

await newDb.registerFileURL(
'requests2023.parquet',
datasets.parquet.hfYtd2023,
4,
);

// Create db connection
Expand Down Expand Up @@ -100,24 +111,43 @@ function DbProvider({ children }) {
// Important: dependency array must be empty or you will get the following error
// "cannot send a message since the worker is not set" and app will infinite loop

// This useEffect specifically handle dynamic table name generation
// separated from the previous useEffect that handles db initialization and teardown
useEffect(() => {
if (startDate) {
const year = moment(startDate).year();
setTableNameByYear(`requests_${year}`);
}
}, [startDate]); // Depend on startDate

// block until db, conn, worker are available
if (!db || !conn || !worker) {
return null;
}

return (
<DbContext.Provider value={{ db, conn, worker }}>
<DbContext.Provider value={{
db, conn, worker, tableNameByYear,
}}
>
{children}
</DbContext.Provider>
);
}

DbProvider.propTypes = {
children: PropTypes.node,
startDate: PropTypes.string,
};

DbProvider.defaultProps = {
children: null,
startDate: null,
};

export default DbProvider;
// connect DbProvider to Redux to get startDate
const mapStateToProps = state => ({
startDate: state.filters.startDate,
});

export default connect(mapStateToProps)(DbProvider);

0 comments on commit 3a6fc04

Please sign in to comment.