diff --git a/lab/db.js b/lab/db.js index 765a008a6..99eda44b6 100644 --- a/lab/db.js +++ b/lab/db.js @@ -54,6 +54,9 @@ db.bind("batches"); db.bind("users"); db.bind("datasets"); db.bind("settings"); +db.bind("chats"); +db.bind("chatlogs"); +db.bind("executions"); // Promisify all methods Object.keys(mongoskin).forEach((key) => { diff --git a/lab/lab.js b/lab/lab.js index dfd8aa711..6823a24bb 100644 --- a/lab/lab.js +++ b/lab/lab.js @@ -54,6 +54,7 @@ const assert = require("assert"); const openaiRouter = require('./routes/openai'); const chatapiRouter = require('./routes/chatapi'); const execapiRouter = require('./routes/execapi'); +const { deleteFilesFromGridstore } = require('./labutils'); /*************** * Enums @@ -601,6 +602,7 @@ app.get("/api/v1/:collection/:id", (req, res, next) => { // Update existing entry app.put("/api/v1/:collection/:id", jsonParser, (req, res, next) => { + delete req.body._id; // Delete ID (will not update otherwise) req.collection.updateByIdAsync(req.params.id, { $set: req.body @@ -626,6 +628,84 @@ app.put("/api/v1/:collection/:id", jsonParser, (req, res, next) => { }); }); +app.delete('/api/v1/datasets/:id', async (req, res, next) => { + const result = {}; + let files = []; + let query = ''; + try { + const dataset_id = db.toObjectID(req.params.id); + let dataset = await db.datasets.findByIdAsync(dataset_id); + + if (dataset == null) { + return res.send({ message: 'dataset ' + req.params.id + ' not found'}); + } + + const dataset_file_id = db.toObjectID(dataset.files[0]._id); + files.push(...dataset.files); + + // experiments + query = { $or: [ + {"_dataset_id": {"$eq": dataset_id}}, + {"_dataset_file_id": {"$eq": dataset_file_id}}, + ]} + let experiments = await db.experiments.find(query).toArrayAsync(); + let experimentIds = []; // maybe I don't need this one. + experiments.forEach(exp => { + experimentIds.push(exp._id); + files.push(...exp.files); + }) + + // chats + query = { $or: [ + {"_dataset_id": {"$eq": dataset_id}}, + {"_experiment_id": {"$in": experimentIds}} + ]} + let chats = await db.chats.find(query).toArrayAsync(); + let chatIds = []; + let chatlogIds = []; + chats.forEach(chat => { + chatIds.push(chat._id); + chatlogIds.push(...chat.chatlogs); + }) + + // executions + query = { $or: [ + {"_dataset_id": {"$eq": dataset_id}}, + {"_dataset_file_id": {"$eq": dataset_file_id}}, + {"_experiment_id": {"$in": experimentIds}} + ]} + let executions = await db.executions.find(query).toArrayAsync(); + executions.forEach(exec => { + files.push(...exec.files); + }) + + // *** DELETE *** + result.datasetCount = await db.datasets.removeByIdAsync(dataset_id); + if (experiments.length > 0) { + result.experimentCount = await db.experiments.removeAsync({'_id': { '$in': experimentIds }}); + console.log('experiment count'); + } + if (chatIds.length > 0) { + result.chatlogCount = (await db.chatlogs.removeAsync({'_id': { '$in': chatlogIds }})); + console.log('chatlogs deleted'); + result.chatCount = (await db.chats.removeAsync({'_id': { '$in': chatIds }})); + console.log('chats deleted'); + } + result.fileCount = await deleteFilesFromGridstore(files); + + // temp values + // result.datasets = dataset; + // result.experiments = experiments; + // result.chats = chats; + // result.executions = executions; + // result.files = files; + + res.send(result); + } catch (err) { + next(err); + } +}); + // Delete existing entry app.delete("/api/v1/:collection/:id", (req, res, next) => { req.collection.removeByIdAsync(req.params.id) @@ -643,7 +723,6 @@ app.delete("/api/v1/:collection/:id", (req, res, next) => { }); - // Experiment page app.get("/api/v1/experiments/:id", (req, res, next) => { db.experiments.findByIdAsync(req.params.id) diff --git a/lab/labutils.js b/lab/labutils.js new file mode 100644 index 000000000..72103c5da --- /dev/null +++ b/lab/labutils.js @@ -0,0 +1,27 @@ +const db = require("./db").db; + +async function deleteFilesFromGridstore(files) { + try { + let filesP = []; + let filesDeleted = 0; + + for (let i = 0; i < files.length; i++) { + let gfs = new db.GridStore(db, files[i]._id, 'w', { + promiseLibrary: Promise + }); + filesP.push(gfs.unlinkAsync().then(() => { + filesDeleted++; + })); + } + + await Promise.all(filesP); + return filesDeleted; + } catch (err) { + console.log(err); + throw err; + } +} + +module.exports = { + deleteFilesFromGridstore +} \ No newline at end of file diff --git a/lab/routes/execapi.js b/lab/routes/execapi.js index 0ec0f6725..24bcf54b7 100644 --- a/lab/routes/execapi.js +++ b/lab/routes/execapi.js @@ -120,103 +120,6 @@ router.post('/executions', async (req, res, next) => { }); -router.post('/executions_old', async (req, res, next) => { - if (req.body.src_code == null) { - return res.status(400).json({ message: 'No src_code provided' }); - } - - // This should not be needed in the code run. The client should take the - // execution_id returned by this enpoint and write it to the next chatlog. - // // this will be the chatlog_id of the chatlog that is requesting this run. - // // the next chatlog will be the one with the results of this run. The client - // // will need to save the execution_id returned by this endpoint in the next - // // chatlog. - // if (req.body.chatlog_id != null) { - // return res.status(400).json({ message: 'no chatlog_id provided' }); - // } - - // create a new execution - let execution = new Execution({ - src_code: req.body.src_code, - status: 'submitted', - result: null, - files: [] - }); - - if (req.body.dataset_file_id != null) { - execution._dataset_file_id = req.body.dataset_file_id; - } else if (req.body.dataset_id != null) { - execution._dataset_id = req.body.dataset_id; - let dataset = await getDatasetById(req.body.dataset_id); - if (dataset != null) { - execution._dataset_file_id = dataset.files[0]._id; - } - } - - if (req.body.experiment_id != null) { - execution._experiment_id = req.body.experiment_id; - } - - try { - const newExecution = await execution.save(); - execution._id = newExecution._id; - } catch (err) { - return res.status(500).json({ message: err.message }); - } - - // make folder if not available yet: - // let tmppath = path.join(process.env.CODE_RUN_PATH, request.execution_id.toString()); - let tmppath = path.join(process.env.CODE_RUN_PATH, execution._id.toString()); - // make tmp folder if it is not available - if (!fs.existsSync(tmppath)) fs.mkdirSync(tmppath, { recursive: true }); - - // find machines that could handle the project - // this may need revision, submitting experiments checks the machine capacity - // but the capacity is tied to each algorithm. Not sure how to handle this yet. - let machines; - try { - machines = await Machine.find({}, { address: 1 }); - if (machines.length == 0) { - return res.status(400).json({ message: 'No machines available' }); - } - // call the machine api - let result = await fetch(machines[0].address + '/code/run', { - method: 'POST', - headers: { - 'Content-Type': 'application/json' - }, - body: JSON.stringify(execution) - }); - result = await result.json(); - - // update the execution status - execution.status = result.exec_results.status; - execution.result = result.exec_results.result; - - // add any generated files in tmppath to the execution.files array - const files = await uploadExecFiles(execution._id, tmppath); - execution.files = files; - - const updatedExecution = await execution.save(); - - // delete the tmp folder - fs.rmdir(tmppath, { recursive: true }, (err) => { - if (err) { - console.error(err); - } else { - console.log(tmppath + ' folder deleted'); - } - }); - - res.send(execution); - } - catch (err) { - console.error(err); - return res.status(500).json({ message: err.message }); - } -}); - - router.post('/executions/install', async (req, res, next) => { if (req.body.command != 'install' && req.body.command != 'freeze') { diff --git a/tests/integration/jest/labApi.js b/tests/integration/jest/labApi.js index ea5fd4449..d13cff5ec 100644 --- a/tests/integration/jest/labApi.js +++ b/tests/integration/jest/labApi.js @@ -32,7 +32,7 @@ along with this program. If not, see . * Api interfact to a lab instance */ -import { get, post, put, putFormData } from './util/apiHelper'; +import { get, post, put, putFormData, deleteRecord } from './util/apiHelper'; export const fetchDatasets = () => get('http://lab:5080/api/userdatasets'); export const fetchDataset = (id) => get(`http://lab:5080/api/datasets/${id}`); @@ -60,5 +60,6 @@ export const getAiStatus = (datasetId) => get(`http://lab:5080/api/userdatasets/ export const fetchExperimentModel = (id) => get(`http://lab:5080/api/v1/experiments/${id}/model`); export const fetchExperimentScript = (id) => get(`http://lab:5080/api/v1/experiments/${id}/script`); -export const postCodeExecutions = (params) => post('http://lab:5080/execapi/v1/executions', params) -export const postPackageInstall = (params) => post('http://lab:5080/execapi/v1/executions/install', params) +export const postCodeExecutions = (params) => post('http://lab:5080/execapi/v1/executions', params); +export const postPackageInstall = (params) => post('http://lab:5080/execapi/v1/executions/install', params); +export const deleteDataset = (id) => deleteRecord(`http://lab:5080/api/v1/datasets/${id}`); diff --git a/tests/integration/jest/labApi.test.ts b/tests/integration/jest/labApi.test.ts index 91051f1ec..25cd0162e 100644 --- a/tests/integration/jest/labApi.test.ts +++ b/tests/integration/jest/labApi.test.ts @@ -39,8 +39,8 @@ describe('lab', () => { form.append('_metadata', metadata) form.append('_files', fs.createReadStream(filepath)); - console.log('fs.createReadStream(filepath)',fs.createReadStream(filepath)) - console.log(`form:`, form) + // console.log('fs.createReadStream(filepath)',fs.createReadStream(filepath)) + // console.log(`form:`, form) let result diff --git a/tests/integration/jest/machineApi.test.ts b/tests/integration/jest/machineApi.test.ts index 453c8808f..7fd2d74f5 100644 --- a/tests/integration/jest/machineApi.test.ts +++ b/tests/integration/jest/machineApi.test.ts @@ -152,6 +152,8 @@ describe('machine', () => { expect(labCodeRun._dataset_id).toEqual(dataset_id) }); + let dataset_result; + it('Test code run API endpoint recognizes model.', async () => { jest.setTimeout(util.JEST_TIMEOUT*10) @@ -176,21 +178,22 @@ describe('machine', () => { console.log('form:', form); - let result; + // let result; try { - result = await labApi.putDataset(form); - console.log('result:'); - console.log(result); + // result = await labApi.putDataset(form); + dataset_result = await labApi.putDataset(form); + console.log('dataset_result:'); + console.log(dataset_result); } catch (e) { var json = await e.response.json() expect(json.error).toBeUndefined() expect(e).toBeUndefined() } - expect(result).toHaveProperty('dataset_id'); + expect(dataset_result).toHaveProperty('dataset_id'); - let dataset_id = result.dataset_id; + let dataset_id = dataset_result.dataset_id; let algoName = 'LogisticRegression' let algoParams = { @@ -265,6 +268,111 @@ describe('machine', () => { expect(labCodeRun.result).toMatch(new RegExp(`^${algoName}?`)) }); + + it('Test that a dataset is deleted correctly', async () => { + // dataset_result is reused from the previous test + expect(dataset_result).toBeTruthy(); + expect(dataset_result).toHaveProperty('dataset_id'); + let delete_dataset_result = await labApi.deleteDataset(dataset_result.dataset_id); + console.log('delete_dataset_result', delete_dataset_result); + expect(delete_dataset_result).toBeTruthy(); + expect(delete_dataset_result).toHaveProperty('datasetCount'); + expect(delete_dataset_result.datasetCount).toEqual(1); + expect(delete_dataset_result).toHaveProperty('experimentCount'); + expect(delete_dataset_result.experimentCount).toBeTruthy(); + }); + + it('Test that an experiment can run after deleting a dataset', async () => { + jest.setTimeout(util.JEST_TIMEOUT*10) + + // upload a test dataset + let filename = 'bananamodel.csv' + let filepath = `${util.DATASET_PATH}/${filename}` + let form = new FormData(); + let metadata = JSON.stringify({ + 'name': filename, + 'username': 'testuser', + 'timestamp': Date.now(), + 'dependent_col': 'class', + 'prediction_type': 'classification', + 'categorical_features': [], + 'ordinal_features': [] + }) + + console.log(metadata) + form.append('_metadata', metadata) + form.append('_files', fs.createReadStream(filepath)); + + console.log('form:', form); + + // let result; + + try { + // result = await labApi.putDataset(form); + dataset_result = await labApi.putDataset(form); + console.log('dataset_result:'); + console.log(dataset_result); + } catch (e) { + var json = await e.response.json() + expect(json.error).toBeUndefined() + expect(e).toBeUndefined() + } + + expect(dataset_result).toHaveProperty('dataset_id'); + + let dataset_id = dataset_result.dataset_id; + + let algoName = 'LogisticRegression' + let algoParams = { + "penalty": "l1", + "C": 1.0, + "dual": false, + "dataset": dataset_id + }; + + // get algorithms + var algorithms = await labApi.fetchAlgorithms(); + expect(algorithms.length).toBeGreaterThanOrEqual(util.MIN_EXPECTED_LAB_ALGO_COUNT); + var algoId = algorithms.find(function(element) { return element.name == algoName; })._id; + expect(algoId).toBeTruthy(); + + // submit a simple experiment + try { + var submitResult = await labApi.submitExperiment(algoId, algoParams); + } catch (e) { + console.log("submit experiment exception") + var json = await e.response.json(); + expect(json).toBeFalsy(); + expect(e).toBeFalsy(); + } + + expect(submitResult).toBeTruthy(); + + // expect that the experiment started running + var experimentResult = await labApi.fetchExperiment(submitResult._id) + //console.log("experimentResults: ", experimentResults) + expect(experimentResult._status).toBeTruthy() + expect(experimentResult._status).toEqual('running') + expect(experimentResult._prediction_type).toEqual('classification') + + // wait for the experiment to finish running, probably a better way to do this then delay... + var count = 0 + console.log("starting timeout...") + // while (experimentResult._status === ('running') && count < 10) { + while (experimentResult._status === ('running') && count < 30) { + util.delay(10000) + count = count + 1 + experimentResult = await labApi.fetchExperiment(experimentResult._id) + console.log("experimentResult._status, count (" + count + "): ", experimentResult._status) + } + console.log("finished timeout...") + + // check that the expected results are there + //console.log("experimentResult: ", experimentResult) + expect(experimentResult._status).toBeTruthy() + expect(experimentResult._status).toEqual('success') + }) + it('Test the package install API endpoint with good package.', async () => { var labCodeInstall = await labApi.postPackageInstall({ command: 'install', packages: ['numpy'] }) expect(labCodeInstall.exec_results.code).toEqual(0) diff --git a/tests/integration/jest/util/apiHelper.js b/tests/integration/jest/util/apiHelper.js index e46bfcd8f..7989714d6 100644 --- a/tests/integration/jest/util/apiHelper.js +++ b/tests/integration/jest/util/apiHelper.js @@ -130,6 +130,25 @@ export const putFormData = (route, form) => { .then(json => json); }; +export const deleteRecord = (route) => { + let myHeaders = new Headers(); + myHeaders.append('Content-Type', 'application/json'); + + return fetch(route, { + method: 'DELETE', + headers: myHeaders, + mode: 'cors', + cache: 'default' + }).then(checkStatus) + .then(response => { + return response.json(); + }) + .catch((err) => { + throw(err); + }) + .then(json => json); +}; + function checkStatus(response) { if (response.status >= 400) {