From e308cc55cf4613f49fcc555c0b38712d5f9ef3e3 Mon Sep 17 00:00:00 2001 From: motoki317 Date: Fri, 22 Mar 2024 09:52:26 +0900 Subject: [PATCH] Implement executable plugin --- Makefile | 2 + README.md | 6 + backend.Dockerfile | 2 + backend/src/api/serve/refactorings.ts | 3 +- backend/src/api/tools/refdiff.ts | 7 +- backend/src/api/tools/rminer.ts | 7 +- backend/src/cmd/import.ts | 35 ---- backend/src/cmd/index.ts | 2 +- backend/src/cmd/jobRunner.ts | 4 +- backend/src/cmd/plugin-refdiff.ts | 11 ++ backend/src/cmd/plugin-rminer.ts | 11 ++ backend/src/config.ts | 152 +++++++++++++++--- backend/src/jobs/info.ts | 4 +- backend/src/jobs/process.ts | 32 ++-- backend/src/jobs/runner/refdiff.ts | 29 ---- backend/src/jobs/runner/rminer.ts | 29 ---- backend/src/mongo.ts | 2 +- .../refdiff.ts => plugins/refdiff-process.ts} | 10 +- backend/src/plugins/refdiff.ts | 29 ++++ .../rminer.ts => plugins/rminer-process.ts} | 10 +- backend/src/plugins/rminer.ts | 29 ++++ common/common.ts | 8 +- common/utils.ts | 9 ++ compose.yaml | 4 + docs/development.md | 52 ++++++ docs/openapi.yaml | 5 + example/plugins/example-process-plugin.sh | 19 +++ 27 files changed, 344 insertions(+), 169 deletions(-) delete mode 100644 backend/src/cmd/import.ts create mode 100644 backend/src/cmd/plugin-refdiff.ts create mode 100644 backend/src/cmd/plugin-rminer.ts delete mode 100644 backend/src/jobs/runner/refdiff.ts delete mode 100644 backend/src/jobs/runner/rminer.ts rename backend/src/{jobs/processor/refdiff.ts => plugins/refdiff-process.ts} (97%) create mode 100644 backend/src/plugins/refdiff.ts rename backend/src/{jobs/processor/rminer.ts => plugins/rminer-process.ts} (95%) create mode 100644 backend/src/plugins/rminer.ts create mode 100755 example/plugins/example-process-plugin.sh diff --git a/Makefile b/Makefile index d95358b..07815a0 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,5 @@ +.DEFAULT_GOAL := up + .PHONY: up up: docker compose --compatibility up -d --build diff --git a/README.md b/README.md index c211010..61f9827 100644 --- a/README.md +++ b/README.md @@ -365,6 +365,12 @@ You can automate data processing by directly calling API via `curl` or something Visit [/api-doc](http://localhost:8080/api-doc) from top-right of the UI, for API documentation in OpenAPI format. Equivalent curl commands for each API call are also obtainable from the swagger UI. +### (Advanced Usage) Using Plugins + +While RefSearch natively support RefactoringMiner and RefDiff, you can bring your own program to detect more refactoring instances. + +See [./docs/development.md](./docs/development.md) for more. + ## Refactoring Types RefSearch uses the following tools under the hood to automatically detect refactorings inside repositories' commits. diff --git a/backend.Dockerfile b/backend.Dockerfile index eb8ea49..b06d844 100644 --- a/backend.Dockerfile +++ b/backend.Dockerfile @@ -42,6 +42,8 @@ COPY --from=builder /work/backend/out /work COPY package.json /work COPY backend/package.json /work/backend +RUN sh -c 'chmod +x ./backend/src/cmd/*.js' + # NOTE: "node pid 1 problem" ENTRYPOINT ["/sbin/tini", "--"] CMD ["node", "backend/src/cmd/jobRunner.js"] diff --git a/backend/src/api/serve/refactorings.ts b/backend/src/api/serve/refactorings.ts index 01556bd..86e4ecb 100644 --- a/backend/src/api/serve/refactorings.ts +++ b/backend/src/api/serve/refactorings.ts @@ -7,6 +7,7 @@ interface PostRequest extends Request { body: { repository: string commit: string + toolName: string refactorings: PureRefactoringMeta[] } @@ -33,7 +34,7 @@ export const postRefactoringsHandler = async (req: PostRequest, res: Response) = } // Insert - const insertRes = await transformAndInsertRefactorings(body.repository, body.commit, body.refactorings) + const insertRes = await transformAndInsertRefactorings(body.repository, body.commit, body.toolName, body.refactorings) return res.status(200).json({ message: `Inserted ${insertRes.insertedCount} document(s)`, diff --git a/backend/src/api/tools/refdiff.ts b/backend/src/api/tools/refdiff.ts index f4defae..70eaf15 100644 --- a/backend/src/api/tools/refdiff.ts +++ b/backend/src/api/tools/refdiff.ts @@ -4,12 +4,13 @@ import { URLSearchParams } from 'url' import { humanishName } from '../../utils.js' import { RefDiffRefactoring } from '../../../../common/refdiff.js' import { HTTPStatusError } from '../error.js' +import { memo } from '../../../../common/utils.js' -const baseUrl = `http://${config.tool.refDiff.host}:${config.tool.refDiff.port}/detect` +const baseUrl = memo(() => `http://${config().tool.refDiff.host}:${config().tool.refDiff.port}/detect`) export const detectRefDiffRefactorings = async (repoUrl: string, commit: string, timeoutSeconds: number): Promise => { - const json = await fetch(baseUrl + '?' + new URLSearchParams({ - dir: config.tool.refDiff.baseRepoPath + '/' + humanishName(repoUrl) + '/.git', + const json = await fetch(baseUrl() + '?' + new URLSearchParams({ + dir: config().tool.refDiff.baseRepoPath + '/' + humanishName(repoUrl) + '/.git', commit: commit, timeout: '' + timeoutSeconds, }).toString()) diff --git a/backend/src/api/tools/rminer.ts b/backend/src/api/tools/rminer.ts index 10ada67..43064f2 100644 --- a/backend/src/api/tools/rminer.ts +++ b/backend/src/api/tools/rminer.ts @@ -4,12 +4,13 @@ import { URLSearchParams } from 'url' import { humanishName } from '../../utils.js' import { RMRefactoring } from '../../../../common/rminer.js' import { HTTPStatusError } from '../error.js' +import { memo } from '../../../../common/utils.js' -const baseUrl = `http://${config.tool.rminer.host}:${config.tool.rminer.port}/detect` +const baseUrl = memo(() => `http://${config().tool.rminer.host}:${config().tool.rminer.port}/detect`) export const detectRMinerRefactorings = async (repoUrl: string, commit: string, timeoutSeconds: number): Promise => { - const json = await fetch(baseUrl + '?' + new URLSearchParams({ - dir: config.tool.rminer.baseRepoPath + '/' + humanishName(repoUrl), + const json = await fetch(baseUrl() + '?' + new URLSearchParams({ + dir: config().tool.rminer.baseRepoPath + '/' + humanishName(repoUrl), commit: commit, timeout: '' + timeoutSeconds, }).toString()) diff --git a/backend/src/cmd/import.ts b/backend/src/cmd/import.ts deleted file mode 100644 index c5eeb86..0000000 --- a/backend/src/cmd/import.ts +++ /dev/null @@ -1,35 +0,0 @@ -import { toolRawDataCol } from '../mongo.js' -import { ExportFormat, ToolRawData } from '../types.js' -import fs from 'fs' - -const main = async () => { - if (process.argv.length < 3) { - console.log(`Usage: node load.js path/to/data.json`) - process.exit(1) - } - - const filename = process.argv[2] - - const data = JSON.parse(fs.readFileSync(filename).toString()) as ExportFormat - if (!Array.isArray(data)) { - throw new Error('invalid data format') - } - console.log(`Read ${data.length} raw data from file ${filename}...`) - - const toolData = data.map((d): ToolRawData => ({ commit: d.sha1, tool: d.tool, data: d.refactorings })) - const res = await toolRawDataCol.bulkWrite(toolData.map((d) => ({ - replaceOne: { - filter: { commit: d.commit, tool: d.tool }, - replacement: d, - upsert: true, - } - }))) - if (!res.isOk()) { - throw new Error('failed to insert data') - } - - console.log(`Inserted ${res.insertedCount}, modified ${res.modifiedCount} documents, import raw data complete.`) -} - -main() - .then(() => process.exit(0)) diff --git a/backend/src/cmd/index.ts b/backend/src/cmd/index.ts index 053deab..2bbe7d0 100644 --- a/backend/src/cmd/index.ts +++ b/backend/src/cmd/index.ts @@ -10,7 +10,7 @@ const main = async () => { app.use(express.json()) registerRoutes(app) - app.listen(config.port, () => console.log(`API server started on port ${config.port}`)) + app.listen(config().port, () => console.log(`API server started on port ${config().port}`)) } main() diff --git a/backend/src/cmd/jobRunner.ts b/backend/src/cmd/jobRunner.ts index db1d020..92853d8 100644 --- a/backend/src/cmd/jobRunner.ts +++ b/backend/src/cmd/jobRunner.ts @@ -46,7 +46,7 @@ const reservedNextJob = async (): Promise => { // Find already reserved / running jobs (in case this job runner has restarted) const order: [keyof Job, 'asc' | 'desc'][] = [['queuedAt', 'asc']] const reserved = await readAllFromCursor( - jobWithData.find({ status: { $in: [JobStatus.Ready, JobStatus.Running] }, 'data.runnerId': config.runnerId }, { sort: order }), + jobWithData.find({ status: { $in: [JobStatus.Ready, JobStatus.Running] }, 'data.runnerId': config().runnerId }, { sort: order }), ) const running = reserved.find((j) => j.status === JobStatus.Running) if (running) return running @@ -64,7 +64,7 @@ const findNextJob = async (): Promise => { // Atomically find and reserve next pipeline const next = await jobDataCol.findOneAndUpdate({ runnerId: { $exists: false } - }, { $set: { runnerId: config.runnerId } }) + }, { $set: { runnerId: config().runnerId } }) if (next) { return reservedNextJob() } diff --git a/backend/src/cmd/plugin-refdiff.ts b/backend/src/cmd/plugin-refdiff.ts new file mode 100644 index 0000000..6cc4881 --- /dev/null +++ b/backend/src/cmd/plugin-refdiff.ts @@ -0,0 +1,11 @@ +#!/usr/bin/env node + +import { pluginRefDiffMain } from '../plugins/refdiff.js' + +if (process.argv.length < 4) { + throw new Error(`Expected at least 4 argv.length, got: ${JSON.stringify(process.argv)}`) +} + +pluginRefDiffMain(process.argv[2], process.argv[3]) + .then(res => console.log(JSON.stringify(res))) + .then(() => process.exit(0)) diff --git a/backend/src/cmd/plugin-rminer.ts b/backend/src/cmd/plugin-rminer.ts new file mode 100644 index 0000000..2fa0dac --- /dev/null +++ b/backend/src/cmd/plugin-rminer.ts @@ -0,0 +1,11 @@ +#!/usr/bin/env node + +import { pluginRMinerMain } from '../plugins/rminer.js' + +if (process.argv.length < 4) { + throw new Error(`Expected at least 4 argv.length, got: ${JSON.stringify(process.argv)}`) +} + +pluginRMinerMain(process.argv[2], process.argv[3]) + .then(res => console.log(JSON.stringify(res))) + .then(() => process.exit(0)) diff --git a/backend/src/config.ts b/backend/src/config.ts index 656963d..d91e365 100644 --- a/backend/src/config.ts +++ b/backend/src/config.ts @@ -1,33 +1,135 @@ -export const config = { - port: Number.parseInt(process.env.PORT ?? '') || 3000, - db: { - user: process.env.MONGODB_USER || 'root', - password: process.env.MONGODB_PASSWORD || 'password', - host: process.env.MONGODB_HOST || 'localhost', - port: process.env.MONGODB_PORT || '27017', - }, - tool: { - rminer: { - host: process.env.RMINER_HOST || 'rminer', - port: process.env.RMINER_PORT || '3000', - baseRepoPath: process.env.RMINER_BASE_PATH || '/data/repos', +import { memo, unique } from '../../common/utils.js' +import path from 'path' +import { PureRefactoringMeta } from '../../common/common.js' +import { spawnSync } from 'node:child_process' +import { repoDirName } from './jobs/info.js' +import { pluginRMinerMain } from './plugins/rminer.js' +import { pluginRefDiffMain } from './plugins/refdiff.js' + +type ProcessPluginFunc = (repoUrl: string, commit: string) => Promise + +export class ProcessPlugin { + private readonly executable: string + + private override: ProcessPluginFunc | undefined + + constructor(executable: string) { + this.executable = executable + } + + public setOverride(f: ProcessPluginFunc): this { + this.override = f + return this + } + + public async run(repoUrl: string, commit: string): Promise { + if (this.override) return this.override(repoUrl, commit) + + const res = spawnSync(this.executable, [repoUrl, commit], { + cwd: repoDirName(repoUrl), + stdio: ['pipe', 'pipe', process.stderr], + }) + + // Check return code + if (res.status !== 0) { + if (res.error) console.trace(res.error) + return Promise.reject(`executable plugin process exited with code ${res.status}`) + } + + // Validate output + const out = JSON.parse(res.stdout.toString()) + if (!Array.isArray(out)) { + return Promise.reject(`plugin output an invalid json (not an array)`) + } + for (const refactoring of out) { + if (typeof refactoring.type !== 'string') { + return Promise.reject(`plugin output an invalid json ("type" string field is required)`) + } + if (typeof refactoring.description !== 'string') { + return Promise.reject(`plugin output an invalid json ("description" string field is required)`) + } + } + + return out + } +} + +export const config = memo(() => { + const c = { + port: Number.parseInt(process.env.PORT ?? '') || 3000, + db: { + user: process.env.MONGODB_USER || 'root', + password: process.env.MONGODB_PASSWORD || 'password', + host: process.env.MONGODB_HOST || 'localhost', + port: process.env.MONGODB_PORT || '27017', }, - refDiff: { - host: process.env.REFDIFF_HOST || 'refdiff', - port: process.env.REFDIFF_PORT || '3000', - baseRepoPath: process.env.REFDIFF_BASE_PATH || '/data/repos', + tool: { + plugins: {} as Record, + rminer: { + host: process.env.RMINER_HOST || 'rminer', + port: process.env.RMINER_PORT || '3000', + baseRepoPath: process.env.RMINER_BASE_PATH || '/data/repos', + }, + refDiff: { + host: process.env.REFDIFF_HOST || 'refdiff', + port: process.env.REFDIFF_PORT || '3000', + baseRepoPath: process.env.REFDIFF_BASE_PATH || '/data/repos', + }, }, - }, - runnerId: process.env.RUNNER_ID || '', - dataDir: process.env.DATA_DIR || '', -} as const + runnerId: process.env.RUNNER_ID || '', + dataDir: process.env.DATA_DIR || '', + } as const + + const readProcessPlugins = () => { + const processPluginEnvPrefix = 'PROCESS_PLUGIN_' + const processPlugins = unique( + Object.entries(process.env) + .filter(([key]) => key.startsWith(processPluginEnvPrefix)) + .map(([key]) => { + const envSuffix = key.substring(processPluginEnvPrefix.length) + return envSuffix.split('_')[0] + }), + ) + + // Built-in plugins + c.tool.plugins['RefactoringMiner'] = new ProcessPlugin( + path.join(import.meta.dirname, './cmd/plugin-rminer.js'), + ) + .setOverride(pluginRMinerMain) // Bypass process spawning to avoid mongo connection overheads + c.tool.plugins['RefDiff'] = new ProcessPlugin( + path.join(import.meta.dirname, './cmd/plugin-refdiff.js'), + ) + .setOverride(pluginRefDiffMain) + + for (const pluginPrefix of processPlugins) { + const prefix = processPluginEnvPrefix + pluginPrefix + '_' -export const validateRunnerConfig = () => { + const name = process.env[prefix + 'NAME'] || '' + const executable = process.env[prefix + 'EXECUTABLE'] || '' + + if (!name || !executable) { + console.warn(`Not all required environment variables are present for ${prefix} group, skipping plugin addition`) + continue + } + if (c.tool.plugins[name]) { + console.warn(`${prefix} group has conflicted name ${name}`) + continue + } + + c.tool.plugins[name] = new ProcessPlugin(executable) + } + } + readProcessPlugins() + + return c +}) + +export const validateRunnerConfig = memo(() => { const rules: [v: string, name: string, msg: string][] = [ - [config.runnerId, 'RUNNER_ID', 'Please set it to a unique value for each job runner.'], - [config.dataDir, 'DATA_DIR', 'Please set it to the path to data directory inside container.'], + [config().runnerId, 'RUNNER_ID', 'Please set it to a unique value for each job runner.'], + [config().dataDir, 'DATA_DIR', 'Please set it to the path to data directory inside container.'], ] for (const [v, name, msg] of rules) { if (!v) throw new Error(`Environment variable ${name} not set. ${msg}`) } -} +}) diff --git a/backend/src/jobs/info.ts b/backend/src/jobs/info.ts index 193984b..bdc2a5d 100644 --- a/backend/src/jobs/info.ts +++ b/backend/src/jobs/info.ts @@ -3,8 +3,8 @@ import fs from 'fs' import path from 'path' import { config } from '../config.js' -export const repositoriesDir = (baseDir: string = config.dataDir) => path.resolve(baseDir, './repos') -export const repoDirName = (repoUrl: string, baseDir: string = config.dataDir): string => `${repositoriesDir(baseDir)}/${humanishName(repoUrl)}` +export const repositoriesDir = (baseDir: string = config().dataDir) => path.resolve(baseDir, './repos') +export const repoDirName = (repoUrl: string, baseDir: string = config().dataDir): string => `${repositoriesDir(baseDir)}/${humanishName(repoUrl)}` const makeDirIfNotExists = (dir: string) => { if (!fs.existsSync(dir)) { diff --git a/backend/src/jobs/process.ts b/backend/src/jobs/process.ts index 9391d75..29ad1f8 100644 --- a/backend/src/jobs/process.ts +++ b/backend/src/jobs/process.ts @@ -1,8 +1,6 @@ import { JobWithId } from '../jobs.js' import { commitsCol, refCol } from '../mongo.js' import { commitUrl, readAllFromCursor } from '../utils.js' -import { processRMiner, rminerToolName } from './runner/rminer.js' -import { processRefDiff, refDiffToolName } from './runner/refdiff.js' import { mergeCommitMetadataIntoRefactorings, updateCommitRefactoringMetadata, @@ -11,26 +9,24 @@ import { import { formatTime } from '../../../common/utils.js' import { commitPlaceholder, CommitProcessState, PureRefactoringMeta, RefactoringMeta } from '../../../common/common.js' import { JobData } from '../../../common/jobs.js' +import { config } from '../config.js' -type CommitId = string -type ToolName = string -type Processor = (repoUrl: string, commit: string) => Promise -const processors: Record = { - [rminerToolName]: processRMiner, - [refDiffToolName]: processRefDiff, -} +type Commit = string -const processCommit = async (repoUrl: string, commitId: CommitId, tools: Record, retryError: boolean) => { +const processCommit = async (repoUrl: string, commit: Commit, tools: Record, retryError: boolean) => { const newTools = Object.assign({}, tools) - let pureRefs: PureRefactoringMeta[] = [] - for (const [tool, process] of Object.entries(processors)) { + for (const [tool, plugin] of Object.entries(config().tool.plugins)) { const toProcess = !(tool in tools) || retryError && tools[tool] === CommitProcessState.NG if (!toProcess) continue try { const start = performance.now() - pureRefs.push(...(await process(repoUrl, commitId))) + + const pureRefs = await plugin.run(repoUrl, commit) + await updateCommitToolsMetadata(commit, newTools) + await transformAndInsertRefactorings(repoUrl, commit, tool, pureRefs) + newTools[tool] = CommitProcessState.OK console.log(` -> ${tool} in ${formatTime(start)}`) } catch (e) { @@ -39,18 +35,18 @@ const processCommit = async (repoUrl: string, commitId: CommitId, tools: Record< newTools[tool] = CommitProcessState.NG } } - - await updateCommitToolsMetadata(commitId, newTools) - await transformAndInsertRefactorings(repoUrl, commitId, pureRefs) } -export const transformAndInsertRefactorings = async (repoUrl: string, commit: string, pureRefs: PureRefactoringMeta[]): Promise<{ insertedCount: number }> => { +export const transformAndInsertRefactorings = async (repoUrl: string, commit: string, toolName: string, pureRefs: PureRefactoringMeta[]): Promise<{ insertedCount: number }> => { const refactorings = pureRefs.map((r): RefactoringMeta => { return { ...r, sha1: commit, repository: repoUrl, url: commitUrl(repoUrl, commit), + meta: { + tool: toolName, + }, commit: commitPlaceholder(), } }) @@ -74,7 +70,7 @@ export const processCommits = async (job: JobWithId, jobData: JobData) => { for (let i = 0; i < commits.length; i++) { const commit = commits[i] - const skip = Object.keys(processors).every((tool) => commit.tools[tool] === CommitProcessState.OK) + const skip = Object.keys(config().tool.plugins).every((tool) => commit.tools[tool] === CommitProcessState.OK) if (skip) continue console.log(`[${i + 1} / ${commits.length}] ${commit.id}`) diff --git a/backend/src/jobs/runner/refdiff.ts b/backend/src/jobs/runner/refdiff.ts deleted file mode 100644 index f24578c..0000000 --- a/backend/src/jobs/runner/refdiff.ts +++ /dev/null @@ -1,29 +0,0 @@ -import { toolRawDataCol } from '../../mongo.js' -import { RefDiffRefactoring } from '../../../../common/refdiff.js' -import { processRefDiffOutput } from '../processor/refdiff.js' -import { detectRefDiffRefactorings } from '../../api/tools/refdiff.js' -import { PureRefactoringMeta } from '../../../../common/common' - -export const refDiffToolName = 'RefDiff' -const timeoutSeconds = 60 - -const getOrRun = async (repoUrl: string, commit: string): Promise => { - const rawData = await toolRawDataCol.findOne({ commit: commit, tool: refDiffToolName }) - if (rawData) return rawData.data as RefDiffRefactoring[] - - const refs = await detectRefDiffRefactorings(repoUrl, commit, timeoutSeconds) - - const insertRes = await toolRawDataCol.replaceOne( - { commit: commit, tool: refDiffToolName }, - { commit: commit, tool: refDiffToolName, data: refs }, - { upsert: true }, - ) - if (!insertRes.acknowledged) throw new Error('Failed to insert refdiff raw data') - - return refs -} - -export const processRefDiff = async (repoUrl: string, commit: string): Promise => { - const refs = await getOrRun(repoUrl, commit) - return processRefDiffOutput(refs) -} diff --git a/backend/src/jobs/runner/rminer.ts b/backend/src/jobs/runner/rminer.ts deleted file mode 100644 index cf2b13f..0000000 --- a/backend/src/jobs/runner/rminer.ts +++ /dev/null @@ -1,29 +0,0 @@ -import { toolRawDataCol } from '../../mongo.js' -import { RMRefactoring } from '../../../../common/rminer.js' -import { processRMinerOutput } from '../processor/rminer.js' -import { detectRMinerRefactorings } from '../../api/tools/rminer.js' -import { PureRefactoringMeta } from '../../../../common/common' - -export const rminerToolName = 'RefactoringMiner' -const timeoutSeconds = 60 - -const getOrRun = async (repoUrl: string, commit: string): Promise => { - const rawData = await toolRawDataCol.findOne({ commit: commit, tool: rminerToolName }) - if (rawData) return rawData.data as RMRefactoring[] - - const refs = await detectRMinerRefactorings(repoUrl, commit, timeoutSeconds) - - const insertRes = await toolRawDataCol.replaceOne( - { commit: commit, tool: rminerToolName }, - { commit: commit, tool: rminerToolName, data: refs }, - { upsert: true }, - ) - if (!insertRes.acknowledged) throw new Error('Failed to insert rminer raw data') - - return refs -} - -export const processRMiner = async (repoUrl: string, commit: string): Promise => { - const refs = await getOrRun(repoUrl, commit) - return processRMinerOutput(refs) -} diff --git a/backend/src/mongo.ts b/backend/src/mongo.ts index 23ea807..e14f803 100644 --- a/backend/src/mongo.ts +++ b/backend/src/mongo.ts @@ -13,7 +13,7 @@ import { readAllFromCursor } from './utils.js' import { ToolRawData } from './types.js' import { config } from './config.js' -const env = config.db +const env = config().db const uri = `mongodb://${env.user}:${env.password}@${env.host}:${env.port}?retryWrites=true&w=majority` const client = new MongoClient(uri) diff --git a/backend/src/jobs/processor/refdiff.ts b/backend/src/plugins/refdiff-process.ts similarity index 97% rename from backend/src/jobs/processor/refdiff.ts rename to backend/src/plugins/refdiff-process.ts index 3f2436b..36b3341 100644 --- a/backend/src/jobs/processor/refdiff.ts +++ b/backend/src/plugins/refdiff-process.ts @@ -6,13 +6,12 @@ import { RefDiffNode, RefDiffNodeWithLines, RefDiffRefactoring, -} from '../../../../common/refdiff.js' +} from '../../../common/refdiff.js' import { PureRefactoringMeta, RefactoringType, RefactoringTypes, -} from '../../../../common/common.js' -import { refDiffToolName } from '../runner/refdiff.js' +} from '../../../common/common.js' type R = PureRefactoringMeta & ProcessedRefDiffRefactoring @@ -119,11 +118,6 @@ export const processRefDiffOutput = (refs: RefDiffRefactoring[]): R[] => { const ret: R = { type: typ, description, - - meta: { - tool: refDiffToolName, - }, - ...process(ref), } diff --git a/backend/src/plugins/refdiff.ts b/backend/src/plugins/refdiff.ts new file mode 100644 index 0000000..9fa0265 --- /dev/null +++ b/backend/src/plugins/refdiff.ts @@ -0,0 +1,29 @@ +import { RefDiffRefactoring } from '../../../common/refdiff.js' +import { toolRawDataCol } from '../mongo.js' +import { detectRefDiffRefactorings } from '../api/tools/refdiff.js' +import { PureRefactoringMeta } from '../../../common/common.js' +import { processRefDiffOutput } from './refdiff-process.js' + +const toolName = 'RefDiff' +const timeoutSeconds = 60 + +const getOrRun = async (repoUrl: string, commit: string): Promise => { + const rawData = await toolRawDataCol.findOne({ commit: commit, tool: toolName }) + if (rawData) return rawData.data as RefDiffRefactoring[] + + const refs = await detectRefDiffRefactorings(repoUrl, commit, timeoutSeconds) + + const insertRes = await toolRawDataCol.replaceOne( + { commit: commit, tool: toolName }, + { commit: commit, tool: toolName, data: refs }, + { upsert: true }, + ) + if (!insertRes.acknowledged) throw new Error('Failed to insert refdiff raw data') + + return refs +} + +export const pluginRefDiffMain = async (repoUrl: string, commit: string): Promise => { + const refs = await getOrRun(repoUrl, commit) + return processRefDiffOutput(refs) +} diff --git a/backend/src/jobs/processor/rminer.ts b/backend/src/plugins/rminer-process.ts similarity index 95% rename from backend/src/jobs/processor/rminer.ts rename to backend/src/plugins/rminer-process.ts index d997335..86ca564 100644 --- a/backend/src/jobs/processor/rminer.ts +++ b/backend/src/plugins/rminer-process.ts @@ -1,5 +1,5 @@ import equal from 'fast-deep-equal' -import { PureRefactoringMeta, RefactoringMeta, RefactoringTypes } from '../../../../common/common.js' +import { PureRefactoringMeta, RefactoringMeta, RefactoringTypes } from '../../../common/common.js' import { CodeElementInfo, CodeElementsMap, @@ -7,8 +7,7 @@ import { RMLeftSideLocation, RMRefactoring, RMRefactoringType, -} from '../../../../common/rminer.js' -import { rminerToolName } from '../runner/rminer.js' +} from '../../../common/rminer.js' type R = PureRefactoringMeta & ProcessedRMRefactoring @@ -92,11 +91,6 @@ export const processRMinerOutput = (refs: RMRefactoring[]): R[] => { .map((r): R => ({ type: r.type, description: r.description, - - meta: { - tool: rminerToolName, - }, - ...process(r), })) diff --git a/backend/src/plugins/rminer.ts b/backend/src/plugins/rminer.ts new file mode 100644 index 0000000..48afb90 --- /dev/null +++ b/backend/src/plugins/rminer.ts @@ -0,0 +1,29 @@ +import { RMRefactoring } from '../../../common/rminer.js' +import { toolRawDataCol } from '../mongo.js' +import { detectRMinerRefactorings } from '../api/tools/rminer.js' +import { PureRefactoringMeta } from '../../../common/common.js' +import { processRMinerOutput } from './rminer-process.js' + +const toolName = 'RefactoringMiner' +const timeoutSeconds = 60 + +const getOrRun = async (repoUrl: string, commit: string): Promise => { + const rawData = await toolRawDataCol.findOne({ commit: commit, tool: toolName }) + if (rawData) return rawData.data as RMRefactoring[] + + const refs = await detectRMinerRefactorings(repoUrl, commit, timeoutSeconds) + + const insertRes = await toolRawDataCol.replaceOne( + { commit: commit, tool: toolName }, + { commit: commit, tool: toolName, data: refs }, + { upsert: true }, + ) + if (!insertRes.acknowledged) throw new Error('Failed to insert rminer raw data') + + return refs +} + +export const pluginRMinerMain = async (repoUrl: string, commit: string): Promise => { + const refs = await getOrRun(repoUrl, commit) + return processRMinerOutput(refs) +} diff --git a/common/common.ts b/common/common.ts index 14254b9..1d94da1 100644 --- a/common/common.ts +++ b/common/common.ts @@ -52,16 +52,16 @@ export type PureRefactoringMeta = { extractMethod?: ExtractMethodInfo rename?: RenameInfo - - meta: { - tool?: string - } } & Partial export type RefactoringMeta = PureRefactoringMeta & { sha1: string repository: string url: string + meta: { + tool?: string + } + commit: Omit // Merged from commits collection on insert } diff --git a/common/utils.ts b/common/utils.ts index a5bfbb9..4ccee71 100644 --- a/common/utils.ts +++ b/common/utils.ts @@ -26,3 +26,12 @@ export const gitHubRepoName = (url: string) => url.substring('https://github.com export const shortSha = (sha1: string) => sha1.substring(0, 7) export const unique = (arr: T[]): T[] => Array.from(new Set(arr)) + +export const memo = (supplier: () => T): () => T => { + let called = false + let value: T + return () => { + if (!called) value = supplier() + return value + } +} diff --git a/compose.yaml b/compose.yaml index fea18fc..2bdc44a 100644 --- a/compose.yaml +++ b/compose.yaml @@ -94,8 +94,12 @@ services: REFDIFF_HOST: refdiff REFDIFF_PORT: 3000 REFDIFF_BASE_PATH: /data/repos + # Process plugin example + # PROCESS_PLUGIN_MYDETECTOR_NAME: "Awesome Refactoring Detector" + # PROCESS_PLUGIN_MYDETECTOR_EXECUTABLE: "/plugins/example-process-plugin.sh" volumes: - ./data:/data + - ./example/plugins:/plugins volumes: caddy_data: diff --git a/docs/development.md b/docs/development.md index ee7d978..41db2fc 100644 --- a/docs/development.md +++ b/docs/development.md @@ -1,5 +1,57 @@ # Development Guide +## Adding processor plugins + +You can add arbitrary processor plugin into the job pipeline, to detect extra refactorings with. + +To configure a processor plugin, do the following: + +- Prepare an executable file inside the RefSearch 'runner' docker container, +- and provide the following environment variables to the RefSearch 'runner' process: + - `PROCESS_PLUGIN_{NAME}_NAME`: Plugin name. This will be set to the `meta.tool` field of detected refactorings. + - `PROCESS_PLUGIN_{NAME}_EXECUTABLE`: Plugin executable path. Details below. + - `{NAME}` inside the environment variable names must not have `_` in it. To add multiple plugins, change the `{NAME}`. + +For example, provide the following environment variables: + +- `PROCESS_PLUGIN_MYDETECTOR_NAME`: `Awesome Refactoring Detector` +- `PROCESS_PLUGIN_MYDETECTOR_EXECUTABLE`: `/plugins/example-process-plugin.sh` + - Make sure the RefSearch 'runner' can find this file. + +The RefSearch 'runner' component will invoke the executable file with: + +- Repository URL as the first argument, +- Commit SHA1 hash as the second argument, +- and cloned repository as the current working directory. + +A plugin **must** only output a single JSON array to its stdout, with at least the following properties for each array element (a detected refactoring instance): + +```typescript +interface PureRefactoringMeta { + type: string + description: string +} +``` + +Each array element (a refactoring instance) may have more fields in order to ease the search. +Extra fields are ingested as-is into the database. + +An example plugin output would be: + +```json +[ + { + "type": "Extract Method", + "description": "Extracted method m1() from m2(String)", + "extractMethod": { + "sourceMethodLines": 10, + "extractedLines": 5, + "sourceMethodsCount": 2 + } + } +] +``` + ## Adding refactoring types To add refactoring types displayed in select box in the UI, add types to `RefactoringTypes` object in `./common/common.ts`. diff --git a/docs/openapi.yaml b/docs/openapi.yaml index 64ad22e..e71a28c 100644 --- a/docs/openapi.yaml +++ b/docs/openapi.yaml @@ -515,6 +515,7 @@ components: required: - repository - commit + - toolName - refactorings properties: repository: @@ -525,6 +526,10 @@ components: type: string description: Commit SHA1 hash example: '6dc3fe36a9528389012e6eac9e90aeb30f300d33' + toolName: + type: string + description: Tool name used to detect refactoring with + example: 'RefactoringMiner' refactorings: type: array description: Refactorings diff --git a/example/plugins/example-process-plugin.sh b/example/plugins/example-process-plugin.sh new file mode 100755 index 0000000..448436b --- /dev/null +++ b/example/plugins/example-process-plugin.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env sh + +# This plugin always finds one static "refactoring instance" per commit. + +# Write to stderr to output some logs. +echo "Some logging output" 1>&2 + +# Output a JSON array to stdout. +echo '[ + { + "type": "Extract Method", + "description": "Extracted method m1() from m2(String)", + "extractMethod": { + "sourceMethodLines": 10, + "extractedLines": 5, + "sourceMethodsCount": 2 + } + } +]'