Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add /api/manifest endpoint #101

Merged
merged 6 commits into from
May 7, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 13 additions & 3 deletions util/populate_dev_data.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ func main() {
log.Fatal(err)
}

emptySecretToken := []interface{}{&base.Token{}}
emptySecretToken := &base.Token{}
staticDataTime, _ := time.Parse(time.RFC3339, "2017-10-18T00:00:00Z")

// Follow pattern established in run/*.py data collection code.
Expand Down Expand Up @@ -134,15 +134,17 @@ func main() {
},
}

tokenKindName := "Token"
testRunKindName := "TestRun"
passRateMetadataKindName := metrics.GetDatastoreKindName(
metrics.PassRateMetadata{})
failuresMetadataKindName := metrics.GetDatastoreKindName(
metrics.FailuresMetadata{})

log.Print("Adding local (empty) secrets...")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not very hard to hit the GitHub API quota. Could this try to read a token from a file, an environment variable, or something, and print a warning if none exists?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't manage to exceed it in all of my trials, so I think I'll wait until I see a problem before I solve it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, perhaps the 403s one easily gets with a few fast reloads of https://foolip.github.io/ecosystem-infra-rotation/ are a different kind of problem that you don't get with just a single request, maybe this: https://developer.github.com/v3/#abuse-rate-limits

addSecretToken(ctx, "upload-token", emptySecretToken)
addSecretToken(ctx, "github-api-token", emptySecretToken)

log.Print("Adding local mock data (static/)...")
addData(ctx, tokenKindName, emptySecretToken)
addData(ctx, testRunKindName, staticTestRunMetadata)
addData(ctx, passRateMetadataKindName, staticPassRateMetadata)
addData(ctx, failuresMetadataKindName, staticFailuresMetadata)
Expand All @@ -164,6 +166,14 @@ func main() {
addData(ctx, testRunKindName, latestProductionTestRunMetadata)
}

func addSecretToken(ctx context.Context, id string, data interface{}) {
key := datastore.NewKey(ctx, "Token", id, 0, nil)
if _, err := datastore.Put(ctx, key, data); err != nil {
log.Fatalf("Failed to add %s secret: %s", id, err.Error())
}
log.Printf("Added %s secret", id)
}

func addData(ctx context.Context, kindName string, data []interface{}) {
keys := make([]*datastore.Key, len(data))
for i := range data {
Expand Down
113 changes: 113 additions & 0 deletions webapp/api_handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,15 @@
package webapp

import (
"bytes"
"compress/gzip"
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
"net/url"
"strconv"
"strings"
"time"

models "github.com/web-platform-tests/wpt.fyi/shared"
Expand Down Expand Up @@ -368,3 +371,113 @@ func handleAPIDiffPost(w http.ResponseWriter, r *http.Request) {
}
w.Write(bytes)
}

func apiManifestHandler(w http.ResponseWriter, r *http.Request) {
sha, err := ParseSHAParamFull(r)
if err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
ctx := appengine.NewContext(r)
if manifest, err := getManifestForSHA(ctx, sha); err != nil {
http.Error(w, err.Error(), http.StatusNotFound)
} else {
w.Header().Add("content-type", "application/json")
w.Write(manifest)
}
}

func gitHubSHASearchURL(sha string) string {
return fmt.Sprintf(`https://api.github.com/search/issues?q=SHA:%s+user:w3c+repo:web-platform-tests`, sha)
}

func gitHubReleaseURL(tag string) string {
return fmt.Sprintf(`https://api.github.com/repos/w3c/web-platform-tests/releases/tags/%s`, tag)
}

type gitHubClient interface {
fetch(url string) ([]byte, error)
}

func getManifestForSHA(ctx context.Context, sha string) (manifest []byte, err error) {
// Fetch models.Token entity for GitHub API Token.
tokenKey := datastore.NewKey(ctx, "Token", "github-api-token", 0, nil)
var token models.Token
datastore.Get(ctx, tokenKey, &token)

client := gitHubClientImpl{
Token: &token,
Context: ctx,
}
return getGitHubReleaseAssetForSHA(&client, sha)
}

func getGitHubReleaseAssetForSHA(client gitHubClient, sha string) (manifest []byte, err error) {
// Search for the PR associated with the SHA.
url := gitHubSHASearchURL(sha)
var body []byte
if body, err = client.fetch(url); err != nil {
return nil, err
}

var queryResults map[string]*json.RawMessage
if err = json.Unmarshal(body, &queryResults); err != nil {
return nil, err
}
var issues []map[string]*json.RawMessage
if err = json.Unmarshal(*queryResults["items"], &issues); err != nil {
return nil, err
}
if len(issues) < 1 {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd like to assert len(issues)==1 and perhaps log all anomalies.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Assertions should be for things that the programmer thinks is impossible, but logging which requests don't find a manifest SGTM. Except, do we have any server side logs at all currently? The less we log the less we need to think about PII and that kind of thing.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AppEngine has built-in logging support. We can just use the standard logging facility provided by the language, and all the logs will go to Google Cloud Stackdriver which has a fancy UI for filtering.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Regarding PII, AppEngine logs requests already. We won't and can't add extra PII if we log some error messages.

return nil, fmt.Errorf("No search results found for SHA %s", sha)
}

// Load the release by the presumed tag name merge_pr_*
var prNumber int
if err = json.Unmarshal(*issues[0]["number"], &prNumber); err != nil {
return nil, err
}

releaseTag := fmt.Sprintf("merge_pr_%d", prNumber)
url = gitHubReleaseURL(releaseTag)
if body, err = client.fetch(url); err != nil {
return nil, err
}

var release map[string]*json.RawMessage
if err = json.Unmarshal(body, &release); err != nil {
return nil, err
}
var assets []map[string]*json.RawMessage
if err = json.Unmarshal(*release["assets"], &assets); err != nil {
return nil, err
}
if len(assets) < 1 {
return nil, fmt.Errorf("No assets found for release %s", releaseTag)
}
// Get (and unzip) the asset with name "MANIFEST-{sha}.json.gz"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to unzip it, or could we set the transfer encoding and just pass along the data?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We played with this a bit. The short answer is that we can't, unless (potentially) we use some dirty hacks.

Basically, GitHub responds a 302 for the download URL to a long AWS S3 URL that has a bunch of query parameters to tell S3 to force a browser download, which means S3 will disregard the Accept-Encoding in request headers, and always return the raw octet stream of the file (which happens to be a gz here).

If we absolutely want to to do it, I guess we can intercept 302 and modify the query parameters, but I guess we don't :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We want to unzip it anyway, once we get to dumping it in the cache and allowing path filtering, etc.

for _, asset := range assets {
var name string
if err = json.Unmarshal(*asset["name"], &name); err != nil {
return nil, err
}
if strings.Contains(name, sha) {
if err = json.Unmarshal(*asset["browser_download_url"], &url); err != nil {
return nil, err
}

if body, err = client.fetch(url); err != nil {
return nil, err
}
gzReader, err := gzip.NewReader(bytes.NewReader(body))
if err != nil {
return nil, err
}
if body, err = ioutil.ReadAll(gzReader); err != nil {
return nil, err
}
return body, nil
}
}
return nil, fmt.Errorf("No manifest asset found for release %s", releaseTag)
}
96 changes: 96 additions & 0 deletions webapp/api_handlers_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
// Copyright 2017 The WPT Dashboard Project. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

package webapp

import (
"bytes"
"compress/gzip"
"encoding/json"
"fmt"
"testing"

"github.com/stretchr/testify/assert"
)

type mockGitHubClient struct {
Responses map[string][]byte
}

func (m *mockGitHubClient) fetch(url string) ([]byte, error) {
if _, ok := m.Responses[url]; !ok {
return nil, fmt.Errorf("fore! oh; for: %s", url)
}
return m.Responses[url], nil
}

func TestGetGitHubReleaseAssetForSHA_SHANotFound(t *testing.T) {
client := mockGitHubClient{}
const sha = "abcdef1234"
manifest, err := getGitHubReleaseAssetForSHA(&client, sha)
assert.Nil(t, manifest)
assert.NotNil(t, err)
}

func TestGetGitHubReleaseAssetForSHA(t *testing.T) {
const sha = "abcdef1234"
searchResults, _ := json.Marshal(
object{
"items": []object{
object{
"number": 123,
},
},
},
)
downloadURL := "http://github.com/magic_url"

releasesJSON := object{
"assets": []object{
object{
"name": "MANIFEST-abcdef1234.json.gz",
"browser_download_url": downloadURL,
},
},
}

var buf bytes.Buffer
zw := gzip.NewWriter(&buf)
zw.Write([]byte("magic data"))
zw.Close()
data := buf.Bytes()

client := mockGitHubClient{
Responses: map[string][]byte{
gitHubSHASearchURL(sha): searchResults,
gitHubReleaseURL("merge_pr_123"): unsafeMarshal(releasesJSON),
downloadURL: data,
},
}

// 1) Data is unzipped.
manifest, err := getGitHubReleaseAssetForSHA(&client, sha)
assert.Nil(t, err)
assert.Equal(t, []byte("magic data"), manifest)

// 2) Correct asset picked when first asset is some other asset.
releasesJSON["assets"] = []object{
object{
"name": "Some other asset.txt",
"browser_download_url": "http://huh.com?",
},
releasesJSON["assets"].([]object)[0],
}
client.Responses[gitHubReleaseURL("merge_pr_123")] = unsafeMarshal(releasesJSON)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It wouldn't affect the test, but checked that web-platform-tests/wpt#123 was merged very long ago so that we won't be creating a real tag called merge_pr_123 :)

manifest, err = getGitHubReleaseAssetForSHA(&client, sha)
assert.Nil(t, err)
assert.Equal(t, []byte("magic data"), manifest)

// 3) Error when no matching asset found.
releasesJSON["assets"] = releasesJSON["assets"].([]object)[0:1] // Just the other asset
client.Responses[gitHubReleaseURL("merge_pr_123")] = unsafeMarshal(releasesJSON)
manifest, err = getGitHubReleaseAssetForSHA(&client, sha)
assert.NotNil(t, err)
assert.Nil(t, manifest)
}
41 changes: 41 additions & 0 deletions webapp/github_client.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package webapp

import (
"fmt"
"io/ioutil"
"net/http"

models "github.com/web-platform-tests/wpt.fyi/shared"
"golang.org/x/net/context"
"google.golang.org/appengine/urlfetch"
)

type gitHubClientImpl struct {
Token *models.Token
Context context.Context
}

func (g *gitHubClientImpl) fetch(url string) ([]byte, error) {
client := urlfetch.Client(g.Context)
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return nil, err
}
if g.Token != nil {
req.Header.Add("Authorization", fmt.Sprintf("token %s", g.Token.Secret))
}
var resp *http.Response
if resp, err = client.Do(req); err != nil {
return nil, err
}
defer resp.Body.Close()

var body []byte
if body, err = ioutil.ReadAll(resp.Body); err != nil {
return nil, err
}
if resp.StatusCode != 200 {
return nil, fmt.Errorf("%s returned HTTP status %d:\n%s", url, resp.StatusCode, string(body))
}
return body, nil
}
3 changes: 3 additions & 0 deletions webapp/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ func init() {
// API endpoint for diff of two test run summary JSON blobs.
http.HandleFunc("/api/diff", apiDiffHandler)

// API endpoint for fetching a manifest for a commit SHA.
http.HandleFunc("/api/manifest", apiManifestHandler)

// API endpoint for listing all test runs for a given SHA.
http.HandleFunc("/api/runs", apiTestRunsHandler)

Expand Down
18 changes: 14 additions & 4 deletions webapp/params.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,19 @@ const MaxCountMinValue = 1
// SHARegex is a regex for SHA[0:10] slice of a git hash.
var SHARegex = regexp.MustCompile("[0-9a-fA-F]{10,40}")

// ParseSHAParam parses and validates the 'sha' param for the request.
// It returns "latest" by default (and in error cases).
// ParseSHAParam parses and validates the 'sha' param for the request,
// cropping it to 10 chars. It returns "latest" by default. (and in error cases).
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is the cropping introduced? It looks like the longest ambiguous shas in wpt are 7 characters long (93d383f, d0b03af, e54fe8f) and it's fairly unlikely we will have a collision, but using full hashes everywhere internally and only accepting shorter ones as input seems like a good idea. But maybe this is necessary because 10-char hashes are parts of the filenames?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think Luke's aware of #49 , but many existing code doesn't take full SHAs so I assume this is a transient solution. At least this endpoint now takes a full SHA (despite chopping it internally), which is good.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note: This PR doesn't introduce the cropping, it bypasses it (since we want to use the full hash here).

See #46 for the temporary 'fix' as Hexcles describes.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh... there was already a runParam[:10] before and this introduces a ParseSHAParamFull. Sorry, didn't finish reading before reacting :/

func ParseSHAParam(r *http.Request) (runSHA string, err error) {
sha, err := ParseSHAParamFull(r)
if err != nil || !SHARegex.MatchString(sha) {
return sha, err
}
return sha[:10], nil
}

// ParseSHAParamFull parses and validates the 'sha' param for the request.
// It returns "latest" by default (and in error cases).
func ParseSHAParamFull(r *http.Request) (runSHA string, err error) {
// Get the SHA for the run being loaded (the first part of the path.)
runSHA = "latest"
params, err := url.ParseQuery(r.URL.RawQuery)
Expand All @@ -40,10 +50,10 @@ func ParseSHAParam(r *http.Request) (runSHA string, err error) {

runParam := params.Get("sha")
if runParam != "" && runParam != "latest" {
runSHA = runParam
if !SHARegex.MatchString(runParam) {
return "", fmt.Errorf("Invalid sha param value: %s", runParam)
return "latest", fmt.Errorf("Invalid sha param value: %s", runParam)
}
runSHA = runParam[:10]
}
return runSHA, err
}
Expand Down
8 changes: 8 additions & 0 deletions webapp/util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,20 @@
package webapp

import (
"encoding/json"
"sort"
"testing"

"github.com/stretchr/testify/assert"
)

type object map[string]interface{}

func unsafeMarshal(i interface{}) []byte {
result, _ := json.Marshal(i)
return result
}

func TestGetBrowserNames(t *testing.T) {
names, _ := GetBrowserNames()
assert.True(t, sort.StringsAreSorted(names))
Expand Down