diff --git a/.cspell.json b/.cspell.json index c78b966..43040b8 100644 --- a/.cspell.json +++ b/.cspell.json @@ -26,7 +26,8 @@ "voyageai", "vectordump", "payloadobject", - "markdownit" + "markdownit", + "plpgsql" ], "dictionaries": ["typescript", "node", "software-terms"], "import": ["@cspell/dict-typescript/cspell-ext.json", "@cspell/dict-node/cspell-ext.json", "@cspell/dict-software-terms"], diff --git a/src/adapters/supabase/helpers/issues.ts b/src/adapters/supabase/helpers/issues.ts index 063d8a9..5f30821 100644 --- a/src/adapters/supabase/helpers/issues.ts +++ b/src/adapters/supabase/helpers/issues.ts @@ -3,14 +3,10 @@ import { SuperSupabase } from "./supabase"; import { Context } from "../../../types/context"; import { markdownToPlainText } from "../../utils/markdown-to-plaintext"; -export interface IssueType { - id: string; - markdown?: string; - author_id: number; - created_at: string; - modified_at: string; - payloadObject: Record | null; - embedding: number[]; +export interface IssueSimilaritySearchResult { + issue_id: string; + issue_plaintext: string; + similarity: number; } export class Issues extends SuperSupabase { @@ -70,15 +66,13 @@ export class Issues extends SuperSupabase { } } - async findSimilarIssues(markdown: string, threshold: number): Promise { + async findSimilarIssues(markdown: string, threshold: number, currentId: string): Promise { const embedding = await this.context.adapters.voyage.embedding.createEmbedding(markdown); - const { data, error } = await this.supabase - .from("issues") - .select("*") - .eq("type", "issue") - .textSearch("embedding", embedding.join(",")) - .order("embedding", { foreignTable: "issues", ascending: false }) - .lte("embedding", threshold); + const { data, error } = await this.supabase.rpc("find_similar_issues", { + current_id: currentId, + query_embedding: embedding, + threshold: threshold, + }); if (error) { this.context.logger.error("Error finding similar issues", error); return []; diff --git a/src/handlers/issue-deduplication.ts b/src/handlers/issue-deduplication.ts new file mode 100644 index 0000000..2378a1e --- /dev/null +++ b/src/handlers/issue-deduplication.ts @@ -0,0 +1,111 @@ +import { IssueSimilaritySearchResult } from "../adapters/supabase/helpers/issues"; +import { Context } from "../types"; +import { IssuePayload } from "../types/payload"; + +export interface IssueGraphqlResponse { + node: { + title: string; + url: string; + }; + similarity: string; +} + +/** + * Check if an issue is similar to any existing issues in the database + * @param context + * @returns true if the issue is similar to an existing issue, false otherwise + */ +export async function issueChecker(context: Context): Promise { + const { + logger, + adapters: { supabase }, + octokit, + } = context; + const { payload } = context as { payload: IssuePayload }; + const issue = payload.issue; + const issueContent = issue.body + issue.title; + + // Fetch all similar issues based on settings.warningThreshold + const similarIssues = await supabase.issue.findSimilarIssues(issueContent, context.config.warningThreshold, issue.node_id); + console.log(similarIssues); + if (similarIssues && similarIssues.length > 0) { + const matchIssues = similarIssues.filter((issue) => issue.similarity >= context.config.matchThreshold); + + // Handle issues that match the MATCH_THRESHOLD (Very Similar) + if (matchIssues.length > 0) { + logger.info(`Similar issue which matches more than ${context.config.matchThreshold} already exists`); + await octokit.issues.update({ + owner: payload.repository.owner.login, + repo: payload.repository.name, + issue_number: issue.number, + state: "closed", + state_reason: "not_planned", + }); + } + + // Handle issues that match the settings.warningThreshold but not the MATCH_THRESHOLD + if (similarIssues.length > 0) { + logger.info(`Similar issue which matches more than ${context.config.warningThreshold} already exists`); + await handleSimilarIssuesComment(context, payload, issue.number, similarIssues); + return true; + } + } + + return false; +} + +/** + * Handle commenting on an issue with similar issues information + * @param context + * @param payload + * @param issueNumber + * @param similarIssues + */ +async function handleSimilarIssuesComment(context: Context, payload: IssuePayload, issueNumber: number, similarIssues: IssueSimilaritySearchResult[]) { + const issueList: IssueGraphqlResponse[] = await Promise.all( + similarIssues.map(async (issue: IssueSimilaritySearchResult) => { + const issueUrl: IssueGraphqlResponse = await context.octokit.graphql( + `query($issueNodeId: ID!) { + node(id: $issueNodeId) { + ... on Issue { + title + url + } + } + }`, + { issueNodeId: issue.issue_id } + ); + issueUrl.similarity = (issue.similarity * 100).toFixed(2); + return issueUrl; + }) + ); + + const commentBody = issueList.map((issue) => `- [${issue.node.title}](${issue.node.url}) Similarity: ${issue.similarity}`).join("\n"); + const body = `This issue seems to be similar to the following issue(s):\n\n${commentBody}`; + + const existingComments = await context.octokit.issues.listComments({ + owner: payload.repository.owner.login, + repo: payload.repository.name, + issue_number: issueNumber, + }); + + const existingComment = existingComments.data.find( + (comment) => comment.body && comment.body.includes("This issue seems to be similar to the following issue(s)") + ); + + if (existingComment) { + await context.octokit.issues.updateComment({ + owner: payload.repository.owner.login, + repo: payload.repository.name, + comment_id: existingComment.id, + body: body, + }); + } else { + await context.octokit.issues.createComment({ + owner: payload.repository.owner.login, + repo: payload.repository.name, + issue_number: issueNumber, + body: body, + }); + } +} diff --git a/src/plugin.ts b/src/plugin.ts index 2f7dad8..0d0876c 100644 --- a/src/plugin.ts +++ b/src/plugin.ts @@ -13,6 +13,7 @@ import { VoyageAIClient } from "voyageai"; import { deleteIssues } from "./handlers/delete-issue"; import { addIssue } from "./handlers/add-issue"; import { updateIssue } from "./handlers/update-issue"; +import { issueChecker } from "./handlers/issue-deduplication"; /** * The main plugin function. Split for easier testing. @@ -31,11 +32,13 @@ export async function runPlugin(context: Context) { } else if (isIssueEvent(context)) { switch (eventName) { case "issues.opened": + await issueChecker(context); return await addIssue(context); - case "issues.deleted": - return await deleteIssues(context); case "issues.edited": + await issueChecker(context); return await updateIssue(context); + case "issues.deleted": + return await deleteIssues(context); } } else { logger.error(`Unsupported event: ${eventName}`); diff --git a/src/types/database.ts b/src/types/database.ts index 3622f52..f741570 100644 --- a/src/types/database.ts +++ b/src/types/database.ts @@ -1,31 +1,6 @@ export type Json = string | number | boolean | null | { [key: string]: Json | undefined } | Json[]; export type Database = { - graphql_public: { - Tables: { - [_ in never]: never; - }; - Views: { - [_ in never]: never; - }; - Functions: { - graphql: { - Args: { - operationName?: string; - query?: string; - variables?: Json; - extensions?: Json; - }; - Returns: Json; - }; - }; - Enums: { - [_ in never]: never; - }; - CompositeTypes: { - [_ in never]: never; - }; - }; public: { Tables: { issue_comments: { @@ -240,321 +215,6 @@ export type Database = { [_ in never]: never; }; }; - storage: { - Tables: { - buckets: { - Row: { - allowed_mime_types: string[] | null; - avif_autodetection: boolean | null; - created_at: string | null; - file_size_limit: number | null; - id: string; - name: string; - owner: string | null; - owner_id: string | null; - public: boolean | null; - updated_at: string | null; - }; - Insert: { - allowed_mime_types?: string[] | null; - avif_autodetection?: boolean | null; - created_at?: string | null; - file_size_limit?: number | null; - id: string; - name: string; - owner?: string | null; - owner_id?: string | null; - public?: boolean | null; - updated_at?: string | null; - }; - Update: { - allowed_mime_types?: string[] | null; - avif_autodetection?: boolean | null; - created_at?: string | null; - file_size_limit?: number | null; - id?: string; - name?: string; - owner?: string | null; - owner_id?: string | null; - public?: boolean | null; - updated_at?: string | null; - }; - Relationships: []; - }; - migrations: { - Row: { - executed_at: string | null; - hash: string; - id: number; - name: string; - }; - Insert: { - executed_at?: string | null; - hash: string; - id: number; - name: string; - }; - Update: { - executed_at?: string | null; - hash?: string; - id?: number; - name?: string; - }; - Relationships: []; - }; - objects: { - Row: { - bucket_id: string | null; - created_at: string | null; - id: string; - last_accessed_at: string | null; - metadata: Json | null; - name: string | null; - owner: string | null; - owner_id: string | null; - path_tokens: string[] | null; - updated_at: string | null; - user_metadata: Json | null; - version: string | null; - }; - Insert: { - bucket_id?: string | null; - created_at?: string | null; - id?: string; - last_accessed_at?: string | null; - metadata?: Json | null; - name?: string | null; - owner?: string | null; - owner_id?: string | null; - path_tokens?: string[] | null; - updated_at?: string | null; - user_metadata?: Json | null; - version?: string | null; - }; - Update: { - bucket_id?: string | null; - created_at?: string | null; - id?: string; - last_accessed_at?: string | null; - metadata?: Json | null; - name?: string | null; - owner?: string | null; - owner_id?: string | null; - path_tokens?: string[] | null; - updated_at?: string | null; - user_metadata?: Json | null; - version?: string | null; - }; - Relationships: [ - { - foreignKeyName: "objects_bucketId_fkey"; - columns: ["bucket_id"]; - isOneToOne: false; - referencedRelation: "buckets"; - referencedColumns: ["id"]; - }, - ]; - }; - s3_multipart_uploads: { - Row: { - bucket_id: string; - created_at: string; - id: string; - in_progress_size: number; - key: string; - owner_id: string | null; - upload_signature: string; - user_metadata: Json | null; - version: string; - }; - Insert: { - bucket_id: string; - created_at?: string; - id: string; - in_progress_size?: number; - key: string; - owner_id?: string | null; - upload_signature: string; - user_metadata?: Json | null; - version: string; - }; - Update: { - bucket_id?: string; - created_at?: string; - id?: string; - in_progress_size?: number; - key?: string; - owner_id?: string | null; - upload_signature?: string; - user_metadata?: Json | null; - version?: string; - }; - Relationships: [ - { - foreignKeyName: "s3_multipart_uploads_bucket_id_fkey"; - columns: ["bucket_id"]; - isOneToOne: false; - referencedRelation: "buckets"; - referencedColumns: ["id"]; - }, - ]; - }; - s3_multipart_uploads_parts: { - Row: { - bucket_id: string; - created_at: string; - etag: string; - id: string; - key: string; - owner_id: string | null; - part_number: number; - size: number; - upload_id: string; - version: string; - }; - Insert: { - bucket_id: string; - created_at?: string; - etag: string; - id?: string; - key: string; - owner_id?: string | null; - part_number: number; - size?: number; - upload_id: string; - version: string; - }; - Update: { - bucket_id?: string; - created_at?: string; - etag?: string; - id?: string; - key?: string; - owner_id?: string | null; - part_number?: number; - size?: number; - upload_id?: string; - version?: string; - }; - Relationships: [ - { - foreignKeyName: "s3_multipart_uploads_parts_bucket_id_fkey"; - columns: ["bucket_id"]; - isOneToOne: false; - referencedRelation: "buckets"; - referencedColumns: ["id"]; - }, - { - foreignKeyName: "s3_multipart_uploads_parts_upload_id_fkey"; - columns: ["upload_id"]; - isOneToOne: false; - referencedRelation: "s3_multipart_uploads"; - referencedColumns: ["id"]; - }, - ]; - }; - }; - Views: { - [_ in never]: never; - }; - Functions: { - can_insert_object: { - Args: { - bucketid: string; - name: string; - owner: string; - metadata: Json; - }; - Returns: undefined; - }; - extension: { - Args: { - name: string; - }; - Returns: string; - }; - filename: { - Args: { - name: string; - }; - Returns: string; - }; - foldername: { - Args: { - name: string; - }; - Returns: string[]; - }; - get_size_by_bucket: { - Args: Record; - Returns: { - size: number; - bucket_id: string; - }[]; - }; - list_multipart_uploads_with_delimiter: { - Args: { - bucket_id: string; - prefix_param: string; - delimiter_param: string; - max_keys?: number; - next_key_token?: string; - next_upload_token?: string; - }; - Returns: { - key: string; - id: string; - created_at: string; - }[]; - }; - list_objects_with_delimiter: { - Args: { - bucket_id: string; - prefix_param: string; - delimiter_param: string; - max_keys?: number; - start_after?: string; - next_token?: string; - }; - Returns: { - name: string; - id: string; - metadata: Json; - updated_at: string; - }[]; - }; - operation: { - Args: Record; - Returns: string; - }; - search: { - Args: { - prefix: string; - bucketname: string; - limits?: number; - levels?: number; - offsets?: number; - search?: string; - sortcolumn?: string; - sortorder?: string; - }; - Returns: { - name: string; - id: string; - updated_at: string; - created_at: string; - last_accessed_at: string; - metadata: Json; - }[]; - }; - }; - Enums: { - [_ in never]: never; - }; - CompositeTypes: { - [_ in never]: never; - }; - }; }; type PublicSchema = Database[Extract]; diff --git a/src/types/plugin-inputs.ts b/src/types/plugin-inputs.ts index 77a40f9..a942db2 100644 --- a/src/types/plugin-inputs.ts +++ b/src/types/plugin-inputs.ts @@ -18,7 +18,13 @@ export interface PluginInputs query_embedding) AS similarity + FROM issues + WHERE id <> current_id + AND 1 - (embedding <=> query_embedding) >= threshold + ORDER BY similarity DESC; +END; +$$ LANGUAGE plpgsql; diff --git a/tests/main.test.ts b/tests/main.test.ts index 40eeb7f..a8c14b8 100644 --- a/tests/main.test.ts +++ b/tests/main.test.ts @@ -163,7 +163,10 @@ function createContextInner( installation: { id: 1 } as Context["payload"]["installation"], organization: { login: STRINGS.USER_1 } as Context["payload"]["organization"], } as Context["payload"], - config: {}, + config: { + warningThreshold: 0.75, + matchThreshold: 0.95, + }, adapters: {} as Context["adapters"], logger: new Logs("debug"), env: {} as Env,