Skip to content

Commit

Permalink
internal/labels,gaby: issue classification
Browse files Browse the repository at this point in the history
Add internal/labels, which can classify issues.
The categories are taken from internal/rules.

Add a page to gaby for manual review of the classification
decisions.

For #64.

Change-Id: Iaea615e9586f7f1af28c2876c4dbc35cea34d44e
Reviewed-on: https://go-review.googlesource.com/c/oscar/+/634935
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Tatiana Bradley <tatianabradley@google.com>
  • Loading branch information
jba committed Dec 10, 2024
1 parent 3f2399f commit cbeb27e
Show file tree
Hide file tree
Showing 9 changed files with 436 additions and 2 deletions.
137 changes: 137 additions & 0 deletions internal/gaby/labels.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package main

import (
"errors"
"fmt"
"net/http"
"slices"
"strconv"
"strings"

"github.com/google/safehtml"
"github.com/google/safehtml/template"
"golang.org/x/oscar/internal/github"
"golang.org/x/oscar/internal/htmlutil"
"golang.org/x/oscar/internal/labels"
)

// labelsPage holds the fields needed to display the results
// of an issue categorization.
type labelsPage struct {
CommonPage

Params labelsParams // the raw parameters
Results []*labelsResult
Error error // if non-nil, the error to display instead of the result
}

type labelsResult struct {
*github.Issue // the issue we're reporting on
Category labels.Category
Explanation string
BodyHTML safehtml.HTML
}

// labelsParams holds the raw inputs to the labels form.
type labelsParams struct {
Query string // the issue ID to lookup
}

func (g *Gaby) handleLabels(w http.ResponseWriter, r *http.Request) {
handlePage(w, g.populateLabelsPage(r), labelsPageTmpl)
}

var labelsPageTmpl = newTemplate(labelsPageTmplFile, template.FuncMap{})

// populateLabelsPage returns the contents of the labels page.
func (g *Gaby) populateLabelsPage(r *http.Request) *labelsPage {
pm := labelsParams{
Query: r.FormValue(paramQuery),
}
p := &labelsPage{
Params: pm,
}
p.setCommonPage()
if pm.Query == "" {
return p
}

var project string
if len(g.githubProjects) > 0 {
project = g.githubProjects[0] // default to first project
}
var issueMin, issueMax int64
smin, smax, ok := strings.Cut(pm.Query, ",")
if ok {
var err1, err2 error
issueMin, err1 = strconv.ParseInt(smin, 10, 64)
issueMax, err2 = strconv.ParseInt(smax, 10, 64)
if err := errors.Join(err1, err2); err != nil {
p.Error = err
return p
}
} else {
proj, issue, err := parseIssueNumber(pm.Query)
if err != nil {
p.Error = fmt.Errorf("invalid form value %q: %w", pm.Query, err)
return p
}
if proj != "" {
if !slices.Contains(g.githubProjects, proj) {
p.Error = fmt.Errorf("invalid form value (unrecognized project): %q", pm.Query)
return p
}
project = proj
}
issueMin = issue
issueMax = issue
}

// Find issues in database.
for i := range github.LookupIssues(g.db, project, issueMin, issueMax) {
cat, exp, err := labels.IssueCategory(r.Context(), g.llm, i)
if err != nil {
p.Error = err
return p
}
p.Results = append(p.Results, &labelsResult{
Issue: i,
Category: cat,
Explanation: exp,
BodyHTML: htmlutil.MarkdownToSafeHTML(i.Body),
})
}
return p
}

func (p *labelsPage) setCommonPage() {
p.CommonPage = CommonPage{
ID: labelsID,
Description: "Categorize issues.",
Styles: []safeURL{searchID.CSS()},
Form: Form{
Inputs: p.Params.inputs(),
SubmitText: "categorize",
},
}
}

func (pm *labelsParams) inputs() []FormInput {
return []FormInput{
{
Label: "issue",
Type: "int, int,int or string",
Description: "the issue(s) to check, as a number, two numbers, or URL (e.g. 1234, golang/go#1234, or https://github.com/golang/go/issues/1234)",
Name: safeQuery,
Required: true,
Typed: TextInput{
ID: safeQuery,
Value: pm.Query,
},
},
}
}
4 changes: 4 additions & 0 deletions internal/gaby/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -569,6 +569,10 @@ func (g *Gaby) newServer(report func(error, *http.Request)) *http.ServeMux {
// /rules?q=...: generate a list of violated rules for issue q.
mux.HandleFunc(get(rulesID), g.handleRules)

// /labels: display label classifications for issues.
// /labels?q=...: report on the classification for issue q.
mux.HandleFunc(get(labelsID), g.handleLabels)

// /api/search: perform a vector similarity search.
// POST because the arguments to the request are in the body.
mux.HandleFunc("POST /api/search", g.handleSearchAPI)
Expand Down
2 changes: 2 additions & 0 deletions internal/gaby/pages.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ const (
searchID pageID = "search"
dbviewID pageID = "dbview"
rulesID pageID = "rules"
labelsID pageID = "labels"
reviewsID pageID = "reviews"
)

Expand All @@ -32,4 +33,5 @@ var titles = map[pageID]string{
dbviewID: "Database Viewer",
rulesID: "Rule Checker",
reviewsID: "Reviews",
labelsID: "Issue Labels",
}
1 change: 1 addition & 0 deletions internal/gaby/templates.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ const (
searchPageTmplFile = "searchpage.tmpl"
overviewPageTmplFile = "overviewpage.tmpl"
rulesPageTmplFile = "rulespage.tmpl"
labelsPageTmplFile = "labelspage.tmpl"
dbviewPageTmplFile = "dbviewpage.tmpl"

// Common template file
Expand Down
39 changes: 39 additions & 0 deletions internal/gaby/tmpl/labelspage.tmpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
<!--
Copyright 2024 The Go Authors. All rights reserved.
Use of this source code is governed by a BSD-style
license that can be found in the LICENSE file.
-->
<!doctype html>
<html>
{{template "head" .}}
<body>
{{template "header" .}}

<div class="section" id="result">
{{- with .Error -}}
<p>Error: {{.}}</p>
{{- else -}}
{{- range .Results -}}
<div style="padding-bottom: 3rem">
<table width="40%">
<tr><td>Issue</td><td><a href="{{.HTMLURL}}">#{{.Number}}</a></td></tr>
<tr><td>Title</td><td><strong>{{.Title}}</strong></td></tr>
<tr><td valign="top">Body</td>
<td><details><summary>Contents</Summary>{{.BodyHTML}}</details></td>
</tr>
<tr><td>Author</td><td>{{.User.Login}}</td></tr>
<tr><td>State</td><td>{{.State}}</td></tr>
<tr><td>Labels</td>
<td>{{range .Labels}}{{.Name}} {{end}}</td>
</tr>
<tr><td colspan=2 height="10rem"></td><tr>
<tr><td>Category</td><td>{{.Category.Name}} ({{.Category.Description}})</td></tr>
<tr><td valign="top">Explanation</td><td>{{.Explanation}}</td></tr>
</table>
</div>
{{- end}}
{{- end}}
</div>
</body>
</html>

22 changes: 20 additions & 2 deletions internal/github/data.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,20 @@ func LookupIssue(db storage.DB, project string, issue int64) (*Issue, error) {
return nil, fmt.Errorf("github.LookupIssue: issue %s#%d not in database", project, issue)
}

// LookupIssues returns an iterator over issues between issueMin and issueMax,
// only consulting the database (not actual GitHub).
func LookupIssues(db storage.DB, project string, issueMin, issueMax int64) iter.Seq[*Issue] {
return func(yield func(*Issue) bool) {
for e := range events(db, project, issueMin, issueMax) {
if e.API == "/issues" {
if !yield(e.Typed.(*Issue)) {
break
}
}
}
}
}

// An Event is a single GitHub issue event stored in the database.
type Event struct {
DBTime timed.DBTime // when event was last written
Expand Down Expand Up @@ -101,14 +115,18 @@ func CleanBody(body string) string {
// Within a specific API, the events are ordered by increasing ID,
// which corresponds to increasing event time on GitHub.
func (c *Client) Events(project string, issueMin, issueMax int64) iter.Seq[*Event] {
return events(c.db, project, issueMin, issueMax)
}

func events(db storage.DB, project string, issueMin, issueMax int64) iter.Seq[*Event] {
return func(yield func(*Event) bool) {
start := o(project, issueMin)
if issueMax < 0 {
issueMax = math.MaxInt64
}
end := o(project, issueMax, ordered.Inf)
for t := range timed.Scan(c.db, eventKind, start, end) {
if !yield(decodeEvent(c.db, t)) {
for t := range timed.Scan(db, eventKind, start, end) {
if !yield(decodeEvent(db, t)) {
return
}
}
Expand Down
128 changes: 128 additions & 0 deletions internal/labels/labels.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Package labels classifies issues.
package labels

import (
"bytes"
"context"
"embed"
"encoding/json"
"errors"
"fmt"
"html/template"
"log"

"golang.org/x/oscar/internal/github"
"golang.org/x/oscar/internal/llm"
)

// A Category is a classification for an issue.
type Category struct {
Name string // internal unique name
Label string // issue tracker label
Description string
}

// IssueCategory returns the category chosen by the LLM for the issue, along with an explanation
// of why it was chosen.
func IssueCategory(ctx context.Context, cgen llm.ContentGenerator, iss *github.Issue) (_ Category, explanation string, err error) {
if iss.PullRequest != nil {
return Category{}, "", errors.New("issue is a pull request")
}

// Extract issue text into a string.
var issueText bytes.Buffer
err = template.Must(template.New("body").Parse(body)).Execute(&issueText, bodyArgs{
Title: iss.Title,
Body: iss.Body,
})
if err != nil {
return Category{}, "", err
}

// Build system prompt to ask about the issue category.
var systemPrompt bytes.Buffer
systemPrompt.WriteString(categoryPrompt)
for _, cat := range config.Categories {
fmt.Fprintf(&systemPrompt, "%s: %s\n", cat.Name, cat.Description)
}

// Ask about the category of the issue.
jsonRes, err := cgen.GenerateContent(ctx, responseSchema,
[]llm.Part{llm.Text(systemPrompt.String()), llm.Text(issueText.String())})
if err != nil {
return Category{}, "", fmt.Errorf("llm request failed: %w\n", err)
}
var res response
if err := json.Unmarshal([]byte(jsonRes), &res); err != nil {
return Category{}, "", fmt.Errorf("unmarshaling %s: %w", jsonRes, err)
}
for _, cat := range config.Categories {
if res.CategoryName == cat.Name {
return cat, res.Explanation, nil
}
}
return Category{}, "", fmt.Errorf("no category matches LLM response %q", jsonRes)
}

// response is the response that should generated by the LLM.
// It must match [responseSchema].
type response struct {
CategoryName string
Explanation string
}

var responseSchema = &llm.Schema{
Type: llm.TypeObject,
Properties: map[string]*llm.Schema{
"CategoryName": {
Type: llm.TypeString,
Description: "the kind of issue",
},
"Explanation": {
Type: llm.TypeString,
Description: "an explanation of why the issue belongs to the category",
},
},
}

const categoryPrompt = `
Your job is to categorize Go issues.
The issue is described by a title and a body.
The issue body is encoded in markdown.
Report the category of the issue and an explanation of your decision.
Each category and its description are listed below.
`
const body = `
The title of the issue is: {{.Title}}
The body of the issue is: {{.Body}}
`

type bodyArgs struct {
Title string
Body string
}

var config struct {
Categories []Category
}

//go:embed static/*
var staticFS embed.FS

func init() {
f, err := staticFS.Open("static/categories.json")
if err != nil {
log.Fatal(err)
}
defer f.Close()
dec := json.NewDecoder(f)
dec.DisallowUnknownFields()
if err := dec.Decode(&config); err != nil {
log.Fatal(err)
}
}
Loading

0 comments on commit cbeb27e

Please sign in to comment.