Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

197 delete datasets #610

Merged
merged 10 commits into from
Sep 25, 2023
3 changes: 3 additions & 0 deletions lab/db.js
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ db.bind("batches");
db.bind("users");
db.bind("datasets");
db.bind("settings");
db.bind("chats");
db.bind("chatlogs");
db.bind("executions");

// Promisify all methods
Object.keys(mongoskin).forEach((key) => {
Expand Down
81 changes: 80 additions & 1 deletion lab/lab.js
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ const assert = require("assert");
const openaiRouter = require('./routes/openai');
const chatapiRouter = require('./routes/chatapi');
const execapiRouter = require('./routes/execapi');
const { deleteFilesFromGridstore } = require('./labutils');

/***************
* Enums
Expand Down Expand Up @@ -601,6 +602,7 @@ app.get("/api/v1/:collection/:id", (req, res, next) => {

// Update existing entry
app.put("/api/v1/:collection/:id", jsonParser, (req, res, next) => {

delete req.body._id; // Delete ID (will not update otherwise)
req.collection.updateByIdAsync(req.params.id, {
$set: req.body
Expand All @@ -626,6 +628,84 @@ app.put("/api/v1/:collection/:id", jsonParser, (req, res, next) => {
});
});

app.delete('/api/v1/datasets/:id', async (req, res, next) => {
const result = {};
let files = [];
let query = '';
try {
const dataset_id = db.toObjectID(req.params.id);
let dataset = await db.datasets.findByIdAsync(dataset_id);

if (dataset == null) {
return res.send({ message: 'dataset ' + req.params.id + ' not found'});
}

const dataset_file_id = db.toObjectID(dataset.files[0]._id);
files.push(...dataset.files);

// experiments
query = { $or: [
{"_dataset_id": {"$eq": dataset_id}},
{"_dataset_file_id": {"$eq": dataset_file_id}},
]}
let experiments = await db.experiments.find(query).toArrayAsync();
let experimentIds = []; // maybe I don't need this one.
experiments.forEach(exp => {
experimentIds.push(exp._id);
files.push(...exp.files);
})

// chats
query = { $or: [
{"_dataset_id": {"$eq": dataset_id}},
{"_experiment_id": {"$in": experimentIds}}
]}
let chats = await db.chats.find(query).toArrayAsync();
let chatIds = [];
let chatlogIds = [];
chats.forEach(chat => {
chatIds.push(chat._id);
chatlogIds.push(...chat.chatlogs);
})

// executions
query = { $or: [
{"_dataset_id": {"$eq": dataset_id}},
{"_dataset_file_id": {"$eq": dataset_file_id}},
{"_experiment_id": {"$in": experimentIds}}
]}
let executions = await db.executions.find(query).toArrayAsync();
executions.forEach(exec => {
files.push(...exec.files);
})

// *** DELETE ***
result.datasetCount = await db.datasets.removeByIdAsync(dataset_id);
if (experiments.length > 0) {
result.experimentCount = await db.experiments.removeAsync({'_id': { '$in': experimentIds }});
console.log('experiment count');
}
if (chatIds.length > 0) {
result.chatlogCount = (await db.chatlogs.removeAsync({'_id': { '$in': chatlogIds }}));
console.log('chatlogs deleted');
result.chatCount = (await db.chats.removeAsync({'_id': { '$in': chatIds }}));
console.log('chats deleted');
}
result.fileCount = await deleteFilesFromGridstore(files);

// temp values
// result.datasets = dataset;
// result.experiments = experiments;
// result.chats = chats;
// result.executions = executions;
// result.files = files;

res.send(result);
} catch (err) {
next(err);
}
});

// Delete existing entry
app.delete("/api/v1/:collection/:id", (req, res, next) => {
req.collection.removeByIdAsync(req.params.id)
Expand All @@ -643,7 +723,6 @@ app.delete("/api/v1/:collection/:id", (req, res, next) => {
});



// Experiment page
app.get("/api/v1/experiments/:id", (req, res, next) => {
db.experiments.findByIdAsync(req.params.id)
Expand Down
27 changes: 27 additions & 0 deletions lab/labutils.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
const db = require("./db").db;

async function deleteFilesFromGridstore(files) {
try {
let filesP = [];
let filesDeleted = 0;

for (let i = 0; i < files.length; i++) {
let gfs = new db.GridStore(db, files[i]._id, 'w', {
promiseLibrary: Promise
});
filesP.push(gfs.unlinkAsync().then(() => {
filesDeleted++;
}));
}

await Promise.all(filesP);
return filesDeleted;
} catch (err) {
console.log(err);
throw err;
}
}

module.exports = {
deleteFilesFromGridstore
}
97 changes: 0 additions & 97 deletions lab/routes/execapi.js
Original file line number Diff line number Diff line change
Expand Up @@ -120,103 +120,6 @@ router.post('/executions', async (req, res, next) => {
});


router.post('/executions_old', async (req, res, next) => {
if (req.body.src_code == null) {
return res.status(400).json({ message: 'No src_code provided' });
}

// This should not be needed in the code run. The client should take the
// execution_id returned by this enpoint and write it to the next chatlog.
// // this will be the chatlog_id of the chatlog that is requesting this run.
// // the next chatlog will be the one with the results of this run. The client
// // will need to save the execution_id returned by this endpoint in the next
// // chatlog.
// if (req.body.chatlog_id != null) {
// return res.status(400).json({ message: 'no chatlog_id provided' });
// }

// create a new execution
let execution = new Execution({
src_code: req.body.src_code,
status: 'submitted',
result: null,
files: []
});

if (req.body.dataset_file_id != null) {
execution._dataset_file_id = req.body.dataset_file_id;
} else if (req.body.dataset_id != null) {
execution._dataset_id = req.body.dataset_id;
let dataset = await getDatasetById(req.body.dataset_id);
if (dataset != null) {
execution._dataset_file_id = dataset.files[0]._id;
}
}

if (req.body.experiment_id != null) {
execution._experiment_id = req.body.experiment_id;
}

try {
const newExecution = await execution.save();
execution._id = newExecution._id;
} catch (err) {
return res.status(500).json({ message: err.message });
}

// make folder if not available yet:
// let tmppath = path.join(process.env.CODE_RUN_PATH, request.execution_id.toString());
let tmppath = path.join(process.env.CODE_RUN_PATH, execution._id.toString());
// make tmp folder if it is not available
if (!fs.existsSync(tmppath)) fs.mkdirSync(tmppath, { recursive: true });

// find machines that could handle the project
// this may need revision, submitting experiments checks the machine capacity
// but the capacity is tied to each algorithm. Not sure how to handle this yet.
let machines;
try {
machines = await Machine.find({}, { address: 1 });
if (machines.length == 0) {
return res.status(400).json({ message: 'No machines available' });
}
// call the machine api
let result = await fetch(machines[0].address + '/code/run', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify(execution)
});
result = await result.json();

// update the execution status
execution.status = result.exec_results.status;
execution.result = result.exec_results.result;

// add any generated files in tmppath to the execution.files array
const files = await uploadExecFiles(execution._id, tmppath);
execution.files = files;

const updatedExecution = await execution.save();

// delete the tmp folder
fs.rmdir(tmppath, { recursive: true }, (err) => {
if (err) {
console.error(err);
} else {
console.log(tmppath + ' folder deleted');
}
});

res.send(execution);
}
catch (err) {
console.error(err);
return res.status(500).json({ message: err.message });
}
});


router.post('/executions/install', async (req, res, next) => {

if (req.body.command != 'install' && req.body.command != 'freeze') {
Expand Down
7 changes: 4 additions & 3 deletions tests/integration/jest/labApi.js
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ along with this program. If not, see <https://www.gnu.org/licenses/>.
* Api interfact to a lab instance
*/

import { get, post, put, putFormData } from './util/apiHelper';
import { get, post, put, putFormData, deleteRecord } from './util/apiHelper';

export const fetchDatasets = () => get('http://lab:5080/api/userdatasets');
export const fetchDataset = (id) => get(`http://lab:5080/api/datasets/${id}`);
Expand Down Expand Up @@ -60,5 +60,6 @@ export const getAiStatus = (datasetId) => get(`http://lab:5080/api/userdatasets/
export const fetchExperimentModel = (id) => get(`http://lab:5080/api/v1/experiments/${id}/model`);
export const fetchExperimentScript = (id) => get(`http://lab:5080/api/v1/experiments/${id}/script`);

export const postCodeExecutions = (params) => post('http://lab:5080/execapi/v1/executions', params)
export const postPackageInstall = (params) => post('http://lab:5080/execapi/v1/executions/install', params)
export const postCodeExecutions = (params) => post('http://lab:5080/execapi/v1/executions', params);
export const postPackageInstall = (params) => post('http://lab:5080/execapi/v1/executions/install', params);
export const deleteDataset = (id) => deleteRecord(`http://lab:5080/api/v1/datasets/${id}`);
4 changes: 2 additions & 2 deletions tests/integration/jest/labApi.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ describe('lab', () => {
form.append('_metadata', metadata)
form.append('_files', fs.createReadStream(filepath));

console.log('fs.createReadStream(filepath)',fs.createReadStream(filepath))
console.log(`form:`, form)
// console.log('fs.createReadStream(filepath)',fs.createReadStream(filepath))
// console.log(`form:`, form)

let result

Expand Down
Loading
Loading