Skip to content

Commit

Permalink
- create_tables and seed_tables scripts working
Browse files Browse the repository at this point in the history
- added pg-native in order to run these queries synchronously
- realized that the comments were being cut off, so added special parsing
  • Loading branch information
tanmibts committed Sep 6, 2020
1 parent 3e35f46 commit 8f5acb0
Show file tree
Hide file tree
Showing 4 changed files with 142 additions and 93 deletions.
8 changes: 5 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,11 @@ Later, you can stop Postgres with `brew services start postgresql`.

3. Load database.

- Create database: `node server/db/create_db_trees.js local`. NOTE: This script isn't working yet; please open the file and follow the manual instructions.
- Create tables: `node server/db/create_tables.js local`
- Seed tables: `node server/db/seed_table.js local`
`cd server/db`

- Create database: `node create_db_trees.js local`. NOTE: This script isn't working yet; please open the file and follow the manual instructions.
- Create tables: `node create_tables.js local`
- Seed tables: `node seed_table.js local`

4. Run the server: `node server/server-api.js local`

Expand Down
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
"morgan": "^1.9.1",
"node-fetch": "^1.7.3",
"pg": "^8.1.0",
"pg-native": "^3.0.0",
"pgtools": "^0.3.0",
"validator": "^11.1.0",
"winston": "^3.2.1"
Expand Down
44 changes: 23 additions & 21 deletions server/db/create_tables.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
// Schema created by Wes Warren, Ajay Anand, Victoria Tan, Rose Meyers, Steve

const env = process.argv[2] || "dev";
const { Pool } = require("pg");
// We are using pg-native for this script only, in order to create the tables synchronously
const Client = require("pg-native");

/*
README:
Expand Down Expand Up @@ -84,8 +85,8 @@ const QUERIES = [
url VARCHAR(255),
urlimage VARCHAR(255),
status VARCHAR(255),
notes VARCHAR(255),
createdAt TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
notes TEXT,
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY(id_tree),
UNIQUE (who, ref, lng, lat) -- TODO: victoria: there seem to be actual duplicates that violate this constraint. is this constraint false?
);`,
Expand All @@ -96,41 +97,42 @@ const QUERIES = [
queryString: `CREATE TABLE IF NOT EXISTS treehistory(
id_treehistory INT GENERATED ALWAYS AS IDENTITY,
id_tree integer,
lng FLOAT8, --TODO: victoria: I don't think we need this (duplicate information)
lat FLOAT8, --TODO: victoria: I don't think we need this (duplicate information)
common VARCHAR(255), --TODO: victoria: I don't think we need this (duplicate information)
watered VARCHAR(255), -- TODO: victoria: should these be booleans? ditto the others as well
watered VARCHAR(255),
mulched VARCHAR(255),
pruned VARCHAR(255),
staked VARCHAR(255),
braced VARCHAR(255),
weeded VARCHAR(255),
comment VARCHAR(255),
comment TEXT,
volunteer VARCHAR(255),
datevisit TIMESTAMP, --TODO: victoria: I don't think we need this; what's the difference between createAt and datevisit?
createdAt TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
date_visit TIMESTAMP, -- equal to created_at for non-seed entries
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, -- For manual debugging use only
PRIMARY KEY(id_treehistory),
CONSTRAINT fk_treedata FOREIGN KEY(id_tree) REFERENCES treedata(id_tree)
);`,
},
];

const main = () => {
const pool = new Pool(config);
const client = new Client();
try {
client.connectSync(
"dbname=treedb user=trees host=localhost password=trees port=5432 connect_timeout=10"
);
} catch (err) {
throw err;
}

console.log("Creating all tables...");
QUERIES.filter((query) => query.apply).forEach((query) => {
pool.query(query.queryString, (err, res) => {
console.log(`${query.description}:`);
if (err) {
console.error("Error: ", err);
process.exit(-1);
}
console.log("Response: ", res);
});
console.log(query.description);
try {
client.querySync(query.queryString);
} catch (err) {
console.error("Error: ", err);
}
});

console.log("Done creating tables.");
pool.end();
};

main();
182 changes: 113 additions & 69 deletions server/db/seed_table.js
Original file line number Diff line number Diff line change
@@ -1,68 +1,99 @@
const fs = require("fs");
const Pool = require("pg").Pool;
const fastcsv = require("fast-csv");
// We are using pg-native for this script only, in order to seed the table synchronously
const Client = require("pg-native");

/*
README
After running this script, check your rows for a confidence check.
(Please Update this accordingly if new sources are added.)
(Please update this accordingly if new sources are added.)
TODO: victoria not sure if these numbers are right because of the possible duplicates
> treedb=# select count(*) from treedata;
>treedb=# select count(*) from treedata;
count
-------
2253
2277
(1 row)
> treedb=# select count(*) from treehistory;
>treedb=# select count(*) from treehistory;
count
-------
739
907
(1 row)
*/

const insertRows = (pathToCsv, queryString, pool) => {
const insertRow = (params, client) => {
client.query(queryString, params, (err, res) => {
if (err) {
console.log("Error during row insertion: ", err.stack);
console.log("Faulty row is ", params);
} else {
console.log(
`Inserted row from ${pathToCsv}: ${params[0]}, ${params[1]} ${params[2]}`
);
}
});
};

const insertRows = (
pathToCsv,
queryString,
client,
expectedNumParams,
modifyRow
) => {
let csvData = [];
const csvStream = fastcsv
.parse({ quote: false })
.on("data", (data) => {
csvData.push(data);
})
.on("end", () => {
// Remove the first line: header
csvData.shift();

pool.connect((err, client, done) => {
if (err) throw err;
try {
csvData.forEach((params) => {
insertRow(params, client);
});
} finally {
done();
fs.readFileSync(pathToCsv)
.toString()
.split("\n")
.forEach((row) => {
let params = row.toString().replace("\r", "").split(",");

modifyRow(params);

/*
Some comments in the csv contain commas. This leads to errors such as the following:
[ From oakland_trees_maintenance_2019.csv ]
Error: ERROR: bind message supplies 13 parameters, but prepared statement "" requires 12
Faulty row is [
'-122.188109',
'37.764347',
'Autumn Blaze maple',
'2019-07-15 11:00:00',
'WB',
'yes',
'yes',
'yes',
'no',
'no',
'yes',
'"Plants',
' owners mulched"'
]
Since the comment field is last, we can safely assume all the columns past
the expected number of parameters are just part of the original comment.
We can append these together to reconstruct the full original comment.
*/
if (params.length > expectedNumParams) {
const commentArray = [];
for (let i = expectedNumParams - 1; i < params.length; i++) {
commentArray.push(params[i]);
}
});
params.length = expectedNumParams;
params[expectedNumParams - 1] = commentArray.join(",");
}

csvData.push(params);
});

fs.createReadStream(pathToCsv).pipe(csvStream);
// Remove header
csvData.shift();

// Insert rows
csvData.forEach((params) => {
try {
client.querySync(queryString, params);
console.log(
`Inserted row from ${pathToCsv}: ${params[0]}, ${params[1]} ${params[2]}`
);
} catch (err) {
console.log(`Error during row insertion for ${pathToCsv}: `, err.stack);
console.log("Faulty row is ", params);
}
});
};

const seedTreeData = (pool) => {
const seedTreeData = (client) => {
const treeDataSources = ["oakland_trees_clean4.csv"];

// Note that this query string is specific to the Oakland data from Sierra Club.
Expand Down Expand Up @@ -93,12 +124,19 @@ const seedTreeData = (pool) => {
)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22)`;

treeDataSources.forEach((sourcePath) =>
insertRows(sourcePath, insertOaklandIntoTreeDataQueryString, pool)
);
treeDataSources.forEach((sourcePath) => {
console.log("source", sourcePath);
insertRows(
sourcePath,
insertOaklandIntoTreeDataQueryString,
client,
22,
() => {}
);
});
};

const seedTreeHistory = (pool) => {
const seedTreeHistory = (client) => {
const treeHistorySources = [
"oakland_trees_maintenance_2019.csv",
"oakland_trees_maintenance_2020.csv",
Expand All @@ -107,10 +145,7 @@ const seedTreeHistory = (pool) => {
// Note that this query string is specific to the Oakland data from Sierra Club.
const insertOaklandIntoTreeHistoryQueryString = `
INSERT INTO treehistory(
lng,
lat,
common,
datevisit,
date_visit,
volunteer,
watered,
staked,
Expand All @@ -121,31 +156,40 @@ const seedTreeHistory = (pool) => {
comment,
id_tree
)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12,
(SELECT id_tree FROM treedata WHERE treedata.lng = $1 AND treedata.lat = $2) -- lng + lat is the unique key
VALUES ($3, $4, $5, $6, $7, $8, $9, $10, $11,
(SELECT treedata.id_tree FROM treedata WHERE treedata.lng = $1 AND treedata.lat = $2 LIMIT 1) -- lng + lat is the unique key
);`;

treeHistorySources.forEach((sourcePath) =>
insertRows(sourcePath, insertOaklandIntoTreeHistoryQueryString, pool)
);
treeHistorySources.forEach((sourcePath) => {
console.log("source", sourcePath);

const modifyRows = (params) => {
// Remove the unused common_name param; psql doesn't like it
params.splice(2, 1);
};

insertRows(
sourcePath,
insertOaklandIntoTreeHistoryQueryString,
client,
11,
modifyRows
);
});
};

const main = () => {
// Create a new connection to the database
const pool = new Pool({
connectionLimit: 10,
database: "treedb",
user: "trees",
host: "localhost",
password: "trees",
port: 5432,
dateStrings: "date",
});

seedTreeData(pool);
seedTreeHistory(pool);

pool.end();
const client = new Client();
try {
client.connectSync(
"dbname=treedb user=trees host=localhost password=trees port=5432 connect_timeout=10"
);
} catch (err) {
throw err;
}

seedTreeData(client);
seedTreeHistory(client);
};

main();

0 comments on commit 8f5acb0

Please sign in to comment.