diff --git a/.github/workflows/dart.yml b/.github/workflows/dart.yml index bf476175..50a6bbd2 100644 --- a/.github/workflows/dart.yml +++ b/.github/workflows/dart.yml @@ -89,16 +89,16 @@ jobs: needs: - job_001 job_003: - name: "analyze_and_format; Dart 3.4.0; PKG: pkgs/dart_flutter_team_lints; `dart analyze --fatal-infos .`" + name: "analyze_and_format; Dart 3.4.0; PKGS: pkgs/dart_flutter_team_lints, pkgs/sdk_triage_bot; `dart analyze --fatal-infos .`" runs-on: ubuntu-latest steps: - name: Cache Pub hosted dependencies uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 with: path: "~/.pub-cache/hosted" - key: "os:ubuntu-latest;pub-cache-hosted;sdk:3.4.0;packages:pkgs/dart_flutter_team_lints;commands:analyze" + key: "os:ubuntu-latest;pub-cache-hosted;sdk:3.4.0;packages:pkgs/dart_flutter_team_lints-pkgs/sdk_triage_bot;commands:analyze" restore-keys: | - os:ubuntu-latest;pub-cache-hosted;sdk:3.4.0;packages:pkgs/dart_flutter_team_lints + os:ubuntu-latest;pub-cache-hosted;sdk:3.4.0;packages:pkgs/dart_flutter_team_lints-pkgs/sdk_triage_bot os:ubuntu-latest;pub-cache-hosted;sdk:3.4.0 os:ubuntu-latest;pub-cache-hosted os:ubuntu-latest @@ -118,19 +118,28 @@ jobs: run: dart analyze --fatal-infos . if: "always() && steps.pkgs_dart_flutter_team_lints_pub_upgrade.conclusion == 'success'" working-directory: pkgs/dart_flutter_team_lints + - id: pkgs_sdk_triage_bot_pub_upgrade + name: pkgs/sdk_triage_bot; dart pub upgrade + run: dart pub upgrade + if: "always() && steps.checkout.conclusion == 'success'" + working-directory: pkgs/sdk_triage_bot + - name: "pkgs/sdk_triage_bot; dart analyze --fatal-infos ." + run: dart analyze --fatal-infos . + if: "always() && steps.pkgs_sdk_triage_bot_pub_upgrade.conclusion == 'success'" + working-directory: pkgs/sdk_triage_bot needs: - job_001 job_004: - name: "analyze_and_format; Dart dev; PKGS: pkgs/corpus, pkgs/dart_flutter_team_lints, pkgs/firehose, pkgs/repo_manage; `dart analyze --fatal-infos .`" + name: "analyze_and_format; Dart dev; PKGS: pkgs/corpus, pkgs/dart_flutter_team_lints, pkgs/firehose, pkgs/repo_manage, pkgs/sdk_triage_bot; `dart analyze --fatal-infos .`" runs-on: ubuntu-latest steps: - name: Cache Pub hosted dependencies uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 with: path: "~/.pub-cache/hosted" - key: "os:ubuntu-latest;pub-cache-hosted;sdk:dev;packages:pkgs/corpus-pkgs/dart_flutter_team_lints-pkgs/firehose-pkgs/repo_manage;commands:analyze" + key: "os:ubuntu-latest;pub-cache-hosted;sdk:dev;packages:pkgs/corpus-pkgs/dart_flutter_team_lints-pkgs/firehose-pkgs/repo_manage-pkgs/sdk_triage_bot;commands:analyze" restore-keys: | - os:ubuntu-latest;pub-cache-hosted;sdk:dev;packages:pkgs/corpus-pkgs/dart_flutter_team_lints-pkgs/firehose-pkgs/repo_manage + os:ubuntu-latest;pub-cache-hosted;sdk:dev;packages:pkgs/corpus-pkgs/dart_flutter_team_lints-pkgs/firehose-pkgs/repo_manage-pkgs/sdk_triage_bot os:ubuntu-latest;pub-cache-hosted;sdk:dev os:ubuntu-latest;pub-cache-hosted os:ubuntu-latest @@ -177,19 +186,28 @@ jobs: run: dart analyze --fatal-infos . if: "always() && steps.pkgs_repo_manage_pub_upgrade.conclusion == 'success'" working-directory: pkgs/repo_manage + - id: pkgs_sdk_triage_bot_pub_upgrade + name: pkgs/sdk_triage_bot; dart pub upgrade + run: dart pub upgrade + if: "always() && steps.checkout.conclusion == 'success'" + working-directory: pkgs/sdk_triage_bot + - name: "pkgs/sdk_triage_bot; dart analyze --fatal-infos ." + run: dart analyze --fatal-infos . + if: "always() && steps.pkgs_sdk_triage_bot_pub_upgrade.conclusion == 'success'" + working-directory: pkgs/sdk_triage_bot needs: - job_001 job_005: - name: "analyze_and_format; Dart dev; PKGS: pkgs/corpus, pkgs/dart_flutter_team_lints, pkgs/firehose, pkgs/repo_manage; `dart format --output=none --set-exit-if-changed .`" + name: "analyze_and_format; Dart dev; PKGS: pkgs/corpus, pkgs/dart_flutter_team_lints, pkgs/firehose, pkgs/repo_manage, pkgs/sdk_triage_bot; `dart format --output=none --set-exit-if-changed .`" runs-on: ubuntu-latest steps: - name: Cache Pub hosted dependencies uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 with: path: "~/.pub-cache/hosted" - key: "os:ubuntu-latest;pub-cache-hosted;sdk:dev;packages:pkgs/corpus-pkgs/dart_flutter_team_lints-pkgs/firehose-pkgs/repo_manage;commands:format" + key: "os:ubuntu-latest;pub-cache-hosted;sdk:dev;packages:pkgs/corpus-pkgs/dart_flutter_team_lints-pkgs/firehose-pkgs/repo_manage-pkgs/sdk_triage_bot;commands:format" restore-keys: | - os:ubuntu-latest;pub-cache-hosted;sdk:dev;packages:pkgs/corpus-pkgs/dart_flutter_team_lints-pkgs/firehose-pkgs/repo_manage + os:ubuntu-latest;pub-cache-hosted;sdk:dev;packages:pkgs/corpus-pkgs/dart_flutter_team_lints-pkgs/firehose-pkgs/repo_manage-pkgs/sdk_triage_bot os:ubuntu-latest;pub-cache-hosted;sdk:dev os:ubuntu-latest;pub-cache-hosted os:ubuntu-latest @@ -236,6 +254,15 @@ jobs: run: "dart format --output=none --set-exit-if-changed ." if: "always() && steps.pkgs_repo_manage_pub_upgrade.conclusion == 'success'" working-directory: pkgs/repo_manage + - id: pkgs_sdk_triage_bot_pub_upgrade + name: pkgs/sdk_triage_bot; dart pub upgrade + run: dart pub upgrade + if: "always() && steps.checkout.conclusion == 'success'" + working-directory: pkgs/sdk_triage_bot + - name: "pkgs/sdk_triage_bot; dart format --output=none --set-exit-if-changed ." + run: "dart format --output=none --set-exit-if-changed ." + if: "always() && steps.pkgs_sdk_triage_bot_pub_upgrade.conclusion == 'success'" + working-directory: pkgs/sdk_triage_bot needs: - job_001 job_006: @@ -347,6 +374,42 @@ jobs: - job_004 - job_005 job_009: + name: "unit_test; Dart 3.4.0; PKG: pkgs/sdk_triage_bot; `dart test`" + runs-on: ubuntu-latest + steps: + - name: Cache Pub hosted dependencies + uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 + with: + path: "~/.pub-cache/hosted" + key: "os:ubuntu-latest;pub-cache-hosted;sdk:3.4.0;packages:pkgs/sdk_triage_bot;commands:test_1" + restore-keys: | + os:ubuntu-latest;pub-cache-hosted;sdk:3.4.0;packages:pkgs/sdk_triage_bot + os:ubuntu-latest;pub-cache-hosted;sdk:3.4.0 + os:ubuntu-latest;pub-cache-hosted + os:ubuntu-latest + - name: Setup Dart SDK + uses: dart-lang/setup-dart@f0ead981b4d9a35b37f30d36160575d60931ec30 + with: + sdk: "3.4.0" + - id: checkout + name: Checkout repository + uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 + - id: pkgs_sdk_triage_bot_pub_upgrade + name: pkgs/sdk_triage_bot; dart pub upgrade + run: dart pub upgrade + if: "always() && steps.checkout.conclusion == 'success'" + working-directory: pkgs/sdk_triage_bot + - name: pkgs/sdk_triage_bot; dart test + run: dart test + if: "always() && steps.pkgs_sdk_triage_bot_pub_upgrade.conclusion == 'success'" + working-directory: pkgs/sdk_triage_bot + needs: + - job_001 + - job_002 + - job_003 + - job_004 + - job_005 + job_010: name: "unit_test; Dart dev; PKG: pkgs/corpus; `dart test`" runs-on: ubuntu-latest steps: @@ -382,7 +445,7 @@ jobs: - job_003 - job_004 - job_005 - job_010: + job_011: name: "unit_test; Dart dev; PKG: pkgs/dart_flutter_team_lints; `dart test`" runs-on: ubuntu-latest steps: @@ -418,7 +481,7 @@ jobs: - job_003 - job_004 - job_005 - job_011: + job_012: name: "unit_test; Dart dev; PKG: pkgs/firehose; `dart test`" runs-on: ubuntu-latest steps: @@ -454,7 +517,43 @@ jobs: - job_003 - job_004 - job_005 - job_012: + job_013: + name: "unit_test; Dart dev; PKG: pkgs/sdk_triage_bot; `dart test`" + runs-on: ubuntu-latest + steps: + - name: Cache Pub hosted dependencies + uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 + with: + path: "~/.pub-cache/hosted" + key: "os:ubuntu-latest;pub-cache-hosted;sdk:dev;packages:pkgs/sdk_triage_bot;commands:test_1" + restore-keys: | + os:ubuntu-latest;pub-cache-hosted;sdk:dev;packages:pkgs/sdk_triage_bot + os:ubuntu-latest;pub-cache-hosted;sdk:dev + os:ubuntu-latest;pub-cache-hosted + os:ubuntu-latest + - name: Setup Dart SDK + uses: dart-lang/setup-dart@f0ead981b4d9a35b37f30d36160575d60931ec30 + with: + sdk: dev + - id: checkout + name: Checkout repository + uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 + - id: pkgs_sdk_triage_bot_pub_upgrade + name: pkgs/sdk_triage_bot; dart pub upgrade + run: dart pub upgrade + if: "always() && steps.checkout.conclusion == 'success'" + working-directory: pkgs/sdk_triage_bot + - name: pkgs/sdk_triage_bot; dart test + run: dart test + if: "always() && steps.pkgs_sdk_triage_bot_pub_upgrade.conclusion == 'success'" + working-directory: pkgs/sdk_triage_bot + needs: + - job_001 + - job_002 + - job_003 + - job_004 + - job_005 + job_014: name: "analyze_format; Dart dev; PKG: pkgs/blast_repo; `dart format --output=none --set-exit-if-changed .`, `dart analyze --fatal-infos .`" runs-on: ubuntu-latest steps: @@ -500,7 +599,9 @@ jobs: - job_009 - job_010 - job_011 - job_013: + - job_012 + - job_013 + job_015: name: "test; Dart dev; PKG: pkgs/blast_repo; `dart test --test-randomize-ordering-seed=random`" runs-on: ubuntu-latest steps: @@ -543,3 +644,5 @@ jobs: - job_010 - job_011 - job_012 + - job_013 + - job_014 diff --git a/pkgs/sdk_triage_bot/.gitignore b/pkgs/sdk_triage_bot/.gitignore new file mode 100644 index 00000000..19cb4832 --- /dev/null +++ b/pkgs/sdk_triage_bot/.gitignore @@ -0,0 +1,11 @@ +# https://dart.dev/guides/libraries/private-files + +# Created by `dart pub` +.dart_tool/ +pubspec.lock + +.env + +tool/training.csv +tool/training.jsonl +tool/training.txt diff --git a/pkgs/sdk_triage_bot/README.md b/pkgs/sdk_triage_bot/README.md new file mode 100644 index 00000000..e91ddafe --- /dev/null +++ b/pkgs/sdk_triage_bot/README.md @@ -0,0 +1,20 @@ +## What's this? + +A LLM based triage automation system for the dart-lang/sdk repo. It processes +new issues filed against the repo and triages them in the same manner that a +human would. This includes: + +- re-summarizing the issue for clarity +- assigning the issues to an `area-` label (first line triage) + +## Bot trigger and entry-point + +TODO: doc + +## Overview + +TODO: doc + +## Tuning + +TODO: doc diff --git a/pkgs/sdk_triage_bot/analysis_options.yaml b/pkgs/sdk_triage_bot/analysis_options.yaml new file mode 100644 index 00000000..d978f811 --- /dev/null +++ b/pkgs/sdk_triage_bot/analysis_options.yaml @@ -0,0 +1 @@ +include: package:dart_flutter_team_lints/analysis_options.yaml diff --git a/pkgs/sdk_triage_bot/bin/triage.dart b/pkgs/sdk_triage_bot/bin/triage.dart new file mode 100644 index 00000000..2541bc6d --- /dev/null +++ b/pkgs/sdk_triage_bot/bin/triage.dart @@ -0,0 +1,83 @@ +// Copyright (c) 2024, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import 'dart:io' as io; + +import 'package:args/args.dart'; +import 'package:github/github.dart'; +import 'package:http/http.dart' as http; +import 'package:sdk_triage_bot/src/common.dart'; +import 'package:sdk_triage_bot/src/gemini.dart'; +import 'package:sdk_triage_bot/src/github.dart'; +import 'package:sdk_triage_bot/triage.dart'; + +void main(List arguments) async { + final argParser = ArgParser(); + argParser.addFlag('dry-run', + negatable: false, + help: 'Perform triage but don\'t make any actual changes to the issue.'); + argParser.addFlag('force', + negatable: false, + help: 'Make changes to the issue even if it already looks triaged.'); + argParser.addFlag('help', + abbr: 'h', negatable: false, help: 'Print this usage information.'); + + final ArgResults results; + try { + results = argParser.parse(arguments); + } on ArgParserException catch (e) { + print(e.message); + print(''); + print(usage); + print(''); + print(argParser.usage); + io.exit(64); + } + + if (results.flag('help') || results.rest.isEmpty) { + print(usage); + print(''); + print(argParser.usage); + io.exit(results.flag('help') ? 0 : 64); + } + + var issue = results.rest.first; + final dryRun = results.flag('dry-run'); + final force = results.flag('force'); + + // Accept either an issue number or a url (i.e., + // https://github.com/dart-lang/sdk/issues/55816). + const sdkToken = 'dart-lang/sdk/issues/'; + if (issue.contains(sdkToken)) { + issue = issue.substring(issue.indexOf(sdkToken) + sdkToken.length); + } + + final client = http.Client(); + + final github = GitHub( + auth: Authentication.withToken(githubToken), + client: client, + ); + final githubService = GithubService(github: github); + + final geminiService = GeminiService( + apiKey: geminiKey, + httpClient: client, + ); + + await triage( + int.parse(issue), + dryRun: dryRun, + force: force, + githubService: githubService, + geminiService: geminiService, + ); + + client.close(); +} + +const String usage = ''' +A tool to triage issues from https://github.com/dart-lang/sdk. + +usage: dart bin/triage.dart [options] '''; diff --git a/pkgs/sdk_triage_bot/lib/src/common.dart b/pkgs/sdk_triage_bot/lib/src/common.dart new file mode 100644 index 00000000..27e0cce5 --- /dev/null +++ b/pkgs/sdk_triage_bot/lib/src/common.dart @@ -0,0 +1,43 @@ +// Copyright (c) 2024, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import 'dart:io'; + +String? _envFileTokenOrEnvironment({required String key}) { + final envFile = File('.env'); + if (envFile.existsSync()) { + final env = {}; + for (var line in envFile.readAsLinesSync().map((line) => line.trim())) { + if (line.isEmpty || line.startsWith('#')) continue; + var split = line.indexOf('='); + env[line.substring(0, split).trim()] = line.substring(split + 1).trim(); + } + return env[key]; + } else { + return Platform.environment[key]; + } +} + +String get githubToken { + var token = _envFileTokenOrEnvironment(key: 'GITHUB_TOKEN'); + if (token == null) { + throw StateError('This tool expects a github access token in the ' + 'GITHUB_TOKEN environment variable.'); + } + return token; +} + +String get geminiKey { + var token = _envFileTokenOrEnvironment(key: 'GOOGLE_API_KEY'); + if (token == null) { + throw StateError('This tool expects a gemini api key in the ' + 'GOOGLE_API_KEY environment variable.'); + } + return token; +} + +/// Don't return more than 4k of text for an issue body. +String trimmedBody(String body) { + return body.length > 4096 ? body = body.substring(0, 4096) : body; +} diff --git a/pkgs/sdk_triage_bot/lib/src/gemini.dart b/pkgs/sdk_triage_bot/lib/src/gemini.dart new file mode 100644 index 00000000..65f5e92a --- /dev/null +++ b/pkgs/sdk_triage_bot/lib/src/gemini.dart @@ -0,0 +1,44 @@ +// Copyright (c) 2024, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import 'package:google_generative_ai/google_generative_ai.dart'; +import 'package:http/http.dart' as http; + +class GeminiService { + final GenerativeModel _summarizeModel; + final GenerativeModel _classifyModel; + + GeminiService({ + required String apiKey, + required http.Client httpClient, + }) : _summarizeModel = GenerativeModel( + model: 'models/gemini-1.5-flash-latest', + apiKey: apiKey, + generationConfig: GenerationConfig(temperature: 0.2), + httpClient: httpClient, + ), + _classifyModel = GenerativeModel( + // TODO(devconcarew): substitute our tuned model + // model: 'tunedModels/autotune-sdk-triage-tuned-prompt-1l96e2n', + model: 'models/gemini-1.5-flash-latest', + apiKey: apiKey, + generationConfig: GenerationConfig(temperature: 0.2), + httpClient: httpClient, + ); + + Future summarize(String prompt) { + return _query(_summarizeModel, prompt); + } + + Future> classify(String prompt) async { + final result = await _query(_classifyModel, prompt); + final labels = result.split(',').map((l) => l.trim()).toList(); + return labels; + } + + Future _query(GenerativeModel model, String prompt) async { + final response = await model.generateContent([Content.text(prompt)]); + return response.text!.trim(); + } +} diff --git a/pkgs/sdk_triage_bot/lib/src/github.dart b/pkgs/sdk_triage_bot/lib/src/github.dart new file mode 100644 index 00000000..4f0b75a3 --- /dev/null +++ b/pkgs/sdk_triage_bot/lib/src/github.dart @@ -0,0 +1,147 @@ +// Copyright (c) 2024, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +// ignore_for_file: avoid_dynamic_calls + +import 'package:github/github.dart'; +import 'package:graphql/client.dart'; + +import 'common.dart'; + +class GithubService { + final GitHub _gitHub; + + GithubService({required GitHub github}) : _gitHub = github; + + Future> getAllLabels(RepositorySlug repoSlug) async { + final result = await _gitHub.issues.listLabels(repoSlug).toList(); + return result.map((item) => item.name).toList(); + } + + Future fetchIssue(RepositorySlug sdkSlug, int issueNumber) async { + return await _gitHub.issues.get(sdkSlug, issueNumber); + } + + Future createComment( + RepositorySlug sdkSlug, int issueNumber, String comment) async { + await _gitHub.issues.createComment(sdkSlug, issueNumber, comment); + } + + Future addLabelsToIssue( + RepositorySlug sdkSlug, int issueNumber, List newLabels) async { + await _gitHub.issues.addLabelsToIssue(sdkSlug, issueNumber, newLabels); + } +} + +Future fetchIssues( + String areaLabel, { + String? cursor, +}) async { + final result = await _query(QueryOptions( + document: gql(_buildQueryString(areaLabel, cursor: cursor)), + fetchPolicy: FetchPolicy.noCache, + parserFn: (data) { + final search = data['search'] as Map; + + // parse issues + final edges = search['edges'] as List; + + final issues = edges.map((data) { + final node = data['node'] as Map; + final labels = (node['labels']['edges'] as List).map((data) { + final node = data['node'] as Map; + return IssueLabel(name: node['name'] as String); + }).toList(); + + return Issue( + title: node['title'] as String, + number: node['number'] as int, + state: node['state'] as String, + bodyText: node['bodyText'] as String?, + labels: labels, + ); + }).toList(); + + // parse cursor + final pageInfo = search['pageInfo'] as Map; + + return FetchIssuesResult( + cursor: pageInfo['endCursor'] as String?, + hasNext: pageInfo['hasNextPage'] as bool, + issues: issues, + ); + }, + )); + + return result.hasException ? throw result.exception! : result.parsedData!; +} + +class FetchIssuesResult { + final bool hasNext; + final String? cursor; + final List issues; + + FetchIssuesResult({ + required this.hasNext, + required this.cursor, + required this.issues, + }); + + @override + String toString() => + '[hasNext=$hasNext, cursor=$cursor, issues=${issues.length}]'; +} + +Future> _query(QueryOptions options) { + return _client.query(options); +} + +String _buildQueryString(String areaLabel, {String? cursor}) { + final cursorRef = cursor == null ? null : '"$cursor"'; + + return '''{ + search( + query: "repo:dart-lang/sdk is:issue is:open label:$areaLabel" + type: ISSUE + first: 100, + after: $cursorRef + ) { + edges { + node { + ... on Issue { + title + number + state + bodyText + labels(first: 10) { + edges { + node { + name + } + } + } + } + } + } + pageInfo { + endCursor + startCursor + hasNextPage + hasPreviousPage + } + } +}'''; +} + +final GraphQLClient _client = _initGraphQLClient(); + +GraphQLClient _initGraphQLClient() { + final token = githubToken; + + final auth = AuthLink(getToken: () async => 'Bearer $token'); + return GraphQLClient( + cache: GraphQLCache(), + link: auth.concat(HttpLink('https://api.github.com/graphql')), + ); +} diff --git a/pkgs/sdk_triage_bot/lib/src/prompts.dart b/pkgs/sdk_triage_bot/lib/src/prompts.dart new file mode 100644 index 00000000..2b40cf4a --- /dev/null +++ b/pkgs/sdk_triage_bot/lib/src/prompts.dart @@ -0,0 +1,68 @@ +// Copyright (c) 2024, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +String assignAreaPrompt({ + required String title, + required String body, +}) { + return ''' +You are a software engineer on the Dart team at Google. You are responsible for +triaging incoming issues from users. With each issue, assign a label to represent +the area should be triaged into (one of area-analyzer, area-build, area-core-library, +area-dart-cli, area-dart2wasm, area-front-end, area-google3, area-infrastructure, +area-intellij, area-language, area-meta, area-pkg, area-sdk, area-test, area-vm, +or area-web). + +Here are the descriptions of the different triage areas: + +area-analyzer: Use area-analyzer for Dart analyzer issues, including the analysis server and code completion. +area-build: Use area-build for SDK build issues. +area-core-library: SDK core library issues (core, async, ...); use area-vm or area-web for platform specific libraries. +area-dart-cli: Use area-dart-cli for issues related to the 'dart' command like tool. +area-dart2wasm: Issues for the dart2wasm compiler. +area-front-end: Use area-front-end for front end / CFE / kernel format related issues. +area-google3: Tracking issues for internal work. Note that this area is not triaged. +area-infrastructure: Use area-infrastructure for SDK infrastructure issues, like continuous integration bot changes. +area-intellij: Tracking issues for the Dart IntelliJ plugin. +area-language: Dart language related items (some items might be better tracked at github.com/dart-lang/language). +area-meta: Cross-cutting, high-level issues (for tracking many other implementation issues, ...). +area-pkg: Used for miscellaneous pkg/ packages not associated with specific area- teams. +area-sdk: Use area-sdk for general purpose SDK issues (packaging, distribution, …). +area-test: Cross-cutting test issues (use area- labels for specific failures; not used for package:test). +area-vm: Use area-vm for VM related issues, including code coverage, FFI, and the AOT and JIT backends. +area-web: Use area-web for Dart web related issues, including the DDC and dart2js compilers and JS interop. + +Don't make up a new area. +Don't use more than one area- label. +If it's not clear which area the issue should go in, don't apply an area- label. + +If the issue is clearly a feature request, then also apply the label 'type-enhancement'. +If the issue is clearly a bug report, then also apply the label 'type-bug'. +If the issue is mostly a question, then also apply the label 'type-question'. +Otherwise don't apply a 'type-' label. + +Return the labels as comma separated text. + +Issue follows: + +$title + +$body'''; +} + +String summarizeIssuePrompt({ + required String title, + required String body, +}) { + return ''' +You are a software engineer on the Dart team at Google. You are responsible for +triaging incoming issues from users. For each issue, briefly summarize the issue +(1-2 sentences, 24 words or less). + +Issue follows: + +$title + +$body'''; +} diff --git a/pkgs/sdk_triage_bot/lib/triage.dart b/pkgs/sdk_triage_bot/lib/triage.dart new file mode 100644 index 00000000..3cff5672 --- /dev/null +++ b/pkgs/sdk_triage_bot/lib/triage.dart @@ -0,0 +1,113 @@ +// Copyright (c) 2024, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import 'package:github/github.dart'; + +import 'src/common.dart'; +import 'src/gemini.dart'; +import 'src/github.dart'; +import 'src/prompts.dart'; + +final sdkSlug = RepositorySlug('dart-lang', 'sdk'); + +Future triage( + int issueNumber, { + bool dryRun = false, + bool force = false, + required GithubService githubService, + required GeminiService geminiService, +}) async { + print('Triaging $sdkSlug...'); + print(''); + + // retrieve the issue + final issue = await githubService.fetchIssue(sdkSlug, issueNumber); + print('## issue ${issue.url}'); + print(''); + print('title: ${issue.title}'); + final labels = issue.labels.map((l) => l.name).toList(); + if (labels.isNotEmpty) { + print('labels: ${labels.join(', ')}'); + } + final bodyLines = + issue.body.split('\n').where((l) => l.trim().isNotEmpty).toList(); + print(''); + for (final line in bodyLines.take(4)) { + print(' $line'); + } + print(''); + + // decide if we should triage + final alreadyTriaged = labels.any((l) => l.startsWith('area-')); + if (alreadyTriaged && !force) { + print('Exiting (issue is already triaged).'); + return; + } + + // ask for the summary + var bodyTrimmed = trimmedBody(issue.body); + // TODO(devoncarew): handle safety failures + final summary = await geminiService.summarize( + summarizeIssuePrompt(title: issue.title, body: bodyTrimmed), + ); + print('## gemini summary'); + print(''); + print(summary); + print(''); + + // ask for the 'area-' classification + // TODO(devoncarew): handle safety failures + final classification = await geminiService.classify( + assignAreaPrompt(title: issue.title, body: bodyTrimmed), + ); + print('## gemini classification'); + print(''); + print(classification); + print(''); + + if (dryRun) { + print('Exiting (dry run mode - not applying changes).'); + return; + } + + // perform changes + print('## github comment'); + print(''); + print(summary); + print(''); + print('labels: $classification'); + + var comment = ''; + if (classification.isNotEmpty) { + comment += classification.map((l) => '`$l`').join(', '); + comment += '\n'; + } + comment += '> $summary\n'; + + // create github comment + await githubService.createComment(sdkSlug, issueNumber, comment); + + final allLabels = await githubService.getAllLabels(sdkSlug); + var newLabels = filterExistingLabels(allLabels, classification); + if (newLabels.any((l) => l.startsWith('area-'))) { + newLabels.add('triage-automation'); + } + // remove any duplicates + newLabels = newLabels.toSet().toList(); + + // apply github labels + if (newLabels.isNotEmpty) { + await githubService.addLabelsToIssue(sdkSlug, issueNumber, newLabels); + } + + print(''); + print('---'); + print(''); + print('Triaged ${issue.url}.'); +} + +List filterExistingLabels( + List allLabels, List newLabels) { + return newLabels.toSet().intersection(allLabels.toSet()).toList(); +} diff --git a/pkgs/sdk_triage_bot/mono_pkg.yaml b/pkgs/sdk_triage_bot/mono_pkg.yaml new file mode 100644 index 00000000..3043b491 --- /dev/null +++ b/pkgs/sdk_triage_bot/mono_pkg.yaml @@ -0,0 +1,13 @@ +# See https://github.com/google/mono_repo.dart +sdk: +- pubspec +- dev + +stages: +- analyze_and_format: + - analyze: --fatal-infos . + - format: + sdk: + - dev +- unit_test: + - test diff --git a/pkgs/sdk_triage_bot/pubspec.yaml b/pkgs/sdk_triage_bot/pubspec.yaml new file mode 100644 index 00000000..c013b070 --- /dev/null +++ b/pkgs/sdk_triage_bot/pubspec.yaml @@ -0,0 +1,18 @@ +name: sdk_triage_bot +description: A triage automation tool for dart-lang/sdk issues. + +publish_to: none + +environment: + sdk: ^3.4.0 + +dependencies: + args: ^2.5.0 + github: ^9.0.0 + google_generative_ai: ^0.4.0 + graphql: ^5.1.0 + http: ^1.2.0 + +dev_dependencies: + dart_flutter_team_lints: ^3.1.0 + test: ^1.24.0 diff --git a/pkgs/sdk_triage_bot/test/fakes.dart b/pkgs/sdk_triage_bot/test/fakes.dart new file mode 100644 index 00000000..1deee5f7 --- /dev/null +++ b/pkgs/sdk_triage_bot/test/fakes.dart @@ -0,0 +1,57 @@ +// Copyright (c) 2024, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import 'package:github/github.dart'; +import 'package:sdk_triage_bot/src/gemini.dart'; +import 'package:sdk_triage_bot/src/github.dart'; + +const int mockIssueNumber = 123; + +class GithubServiceMock implements GithubService { + @override + Future> getAllLabels(RepositorySlug repoSlug) async { + return ['area-analyzer', 'area-vm', 'type-enhancement', 'type-bug']; + } + + Issue returnedIssue = Issue( + url: 'https://github.com/dart-lang/sdk/issues/55869', + title: 'Add full support for service ID zones', + number: mockIssueNumber, + body: 'Lorem ipsum.', + labels: [], + ); + + @override + Future fetchIssue(RepositorySlug sdkSlug, int issueNumber) async { + return returnedIssue; + } + + String? updatedComment; + + @override + Future createComment( + RepositorySlug sdkSlug, int issueNumber, String comment) async { + updatedComment = comment; + } + + List? updatedLabels; + + @override + Future addLabelsToIssue( + RepositorySlug sdkSlug, int issueNumber, List newLabels) async { + updatedLabels = newLabels; + } +} + +class GeminiServiceStub implements GeminiService { + @override + Future summarize(String prompt) async { + return 'Lorem ipsum.'; + } + + @override + Future> classify(String prompt) async { + return ['area-vm', 'type-bug']; + } +} diff --git a/pkgs/sdk_triage_bot/test/triage_test.dart b/pkgs/sdk_triage_bot/test/triage_test.dart new file mode 100644 index 00000000..28f92316 --- /dev/null +++ b/pkgs/sdk_triage_bot/test/triage_test.dart @@ -0,0 +1,74 @@ +// Copyright (c) 2024, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import 'package:github/github.dart'; +import 'package:sdk_triage_bot/triage.dart'; +import 'package:test/test.dart'; + +import 'fakes.dart'; + +void main() { + test('triages issue', () async { + final githubService = GithubServiceMock(); + final geminiService = GeminiServiceStub(); + + await triage( + mockIssueNumber, + githubService: githubService, + geminiService: geminiService, + ); + + expect(githubService.updatedComment, isNotEmpty); + expect(githubService.updatedComment, contains('Lorem ipsum')); + expect(githubService.updatedLabels, contains(startsWith('area-'))); + expect(githubService.updatedLabels, contains('triage-automation')); + }); + + test('skips triaged issues', () async { + final githubService = GithubServiceMock(); + final geminiService = GeminiServiceStub(); + + githubService.returnedIssue = Issue( + url: 'https://github.com/dart-lang/sdk/issues/55869', + title: 'Add full support for service ID zones', + number: mockIssueNumber, + body: 'Lorem ipsum.', + labels: [IssueLabel(name: 'area-vm')], + ); + + await triage( + mockIssueNumber, + githubService: githubService, + geminiService: geminiService, + ); + + expect(githubService.updatedComment, isNull); + expect(githubService.updatedLabels, isNull); + }); + + test('respects --force flag', () async { + final githubService = GithubServiceMock(); + final geminiService = GeminiServiceStub(); + + githubService.returnedIssue = Issue( + url: 'https://github.com/dart-lang/sdk/issues/55869', + title: 'Add full support for service ID zones', + number: mockIssueNumber, + body: 'Lorem ipsum.', + labels: [IssueLabel(name: 'area-vm')], + ); + + await triage( + mockIssueNumber, + githubService: githubService, + geminiService: geminiService, + force: true, + ); + + expect(githubService.updatedComment, isNotEmpty); + expect(githubService.updatedComment, contains('Lorem ipsum')); + expect(githubService.updatedLabels, contains(startsWith('area-'))); + expect(githubService.updatedLabels, contains('triage-automation')); + }); +} diff --git a/pkgs/sdk_triage_bot/tool/create_tuning_data.dart b/pkgs/sdk_triage_bot/tool/create_tuning_data.dart new file mode 100644 index 00000000..05ac6e95 --- /dev/null +++ b/pkgs/sdk_triage_bot/tool/create_tuning_data.dart @@ -0,0 +1,167 @@ +// Copyright (c) 2024, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import 'dart:convert'; +import 'dart:io'; + +import 'package:github/github.dart'; +import 'package:sdk_triage_bot/src/common.dart'; +import 'package:sdk_triage_bot/src/github.dart'; +import 'package:sdk_triage_bot/src/prompts.dart'; + +// Here, we download 500-1000 already triaged github issues and create a file +// suitable for tuning a Gemini model (via https://aistudio.google.com/). +// +// - make sure we have more of the more common areas +// - make sure we have at least 10 items from each area + +const Map areaSampleCount = { + 'area-vm': 100, + 'area-analyzer': 100, + 'area-web': 100, + 'area-core-library': 100, + 'area-front-end': 100, + // + 'area-language': 50, + 'area-infrastructure': 50, + 'area-test': 50, + 'area-dart-cli': 50, + // + 'area-meta': 25, + 'area-dart2wasm': 25, + // + 'area-sdk': 10, + 'area-intellij': 10, + 'area-tools': 10, + 'area-build': 10, + 'area-google3': 10, +}; + +void main(List args) async { + print('Building tuning data...'); + print(''); + + // download issues + final issueMap = {}; + + for (var entry in areaSampleCount.entries) { + final areaLabel = entry.key; + final count = entry.value; + + final results = await downloadIssues(areaLabel, count); + print('Downloaded ${results.length} issues from $areaLabel'); + + // use the map to remove dups + for (final issue in results) { + issueMap[issue.number] = issue; + } + } + + // sort by issue number + final issues = issueMap.values.toList(); + issues.sort((a, b) => b.number - a.number); + + // emit training file + final trainingFileCsv = File('tool/training.csv'); + final trainingFileJsonl = File('tool/training.jsonl'); + final trainingFileDesc = File('tool/training.txt'); + + final trainingDataCsv = + issues.map((issue) => issue.trainingRowCSV).join('\n'); + trainingFileCsv.writeAsStringSync('$trainingDataCsv\n'); + + final trainingDataJsonl = + issues.map((issue) => issue.trainingRowJsonl).join('\n'); + trainingFileJsonl.writeAsStringSync('$trainingDataJsonl\n'); + + final trainingDesc = issues.map((issue) => issue.trainingDesc).join('\n'); + trainingFileDesc.writeAsStringSync('$trainingDesc\n'); + + print(''); + print('Wrote training data to ${trainingFileCsv.path} and ' + '${trainingFileJsonl.path}.'); + exit(0); +} + +Future> downloadIssues(String areaLabel, int count) async { + var result = await fetchIssues(areaLabel); + + final issues = []; + + while (result.issues.isNotEmpty) { + for (final issue in result.issues) { + issues.add(issue); + + if (issues.length >= count) { + return issues; + } + } + + if (!result.hasNext) { + break; + } else { + result = await fetchIssues(areaLabel, cursor: result.cursor); + } + } + + return issues; +} + +extension on Issue { + String get trainingRowCSV { + final bodyValue = trimmedBody(bodyText!); + final filteredLabels = labels.map((l) => l.name).where((label) { + return label.startsWith('area-') || label.startsWith('type-'); + }).toList(); + + // csv encode + final input = assignAreaPrompt(title: title, body: bodyValue); + final output = filteredLabels.join(', '); + + return '${csvEncode(input)},${csvEncode(output)}'; + } + + String get trainingRowJsonl { + final bodyValue = trimmedBody(bodyText!); + final filteredLabels = labels.map((l) => l.name).where((label) { + return label.startsWith('area-') || label.startsWith('type-'); + }).toList(); + + final input = assignAreaPrompt(title: title, body: bodyValue); + final output = filteredLabels.join(', '); + + return jsonEncode({ + 'messages': [ + {'role': 'user', 'content': input}, + {'role': 'model', 'content': output}, + ], + }); + } + + String get trainingDesc { + var shortTitle = title; + if (shortTitle.length > 80) { + shortTitle = '${shortTitle.substring(0, 80)}...'; + } + final filteredLabels = labels.map((l) => l.name).where((label) { + return label.startsWith('area-') || label.startsWith('type-'); + }).toList(); + + return '[$number] "$shortTitle": ${filteredLabels.join(', ')}'; + } +} + +String csvEncode(String str) { + str = str.replaceAll('\n', r' \n '); + + if (str.contains('"')) { + str = str.replaceAll('"', '""'); + } + + if (str.contains("'") || str.contains(' ') || str.contains('"')) { + return '"$str"'; + } + + return str; +}