-
Notifications
You must be signed in to change notification settings - Fork 94
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add /api/manifest endpoint #101
Changes from all commits
0e24d14
1918e6d
274ee45
ec244d5
da904a8
c565a8c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,12 +5,15 @@ | |
package webapp | ||
|
||
import ( | ||
"bytes" | ||
"compress/gzip" | ||
"encoding/json" | ||
"fmt" | ||
"io/ioutil" | ||
"net/http" | ||
"net/url" | ||
"strconv" | ||
"strings" | ||
"time" | ||
|
||
models "github.com/web-platform-tests/wpt.fyi/shared" | ||
|
@@ -368,3 +371,113 @@ func handleAPIDiffPost(w http.ResponseWriter, r *http.Request) { | |
} | ||
w.Write(bytes) | ||
} | ||
|
||
func apiManifestHandler(w http.ResponseWriter, r *http.Request) { | ||
sha, err := ParseSHAParamFull(r) | ||
if err != nil { | ||
http.Error(w, err.Error(), http.StatusBadRequest) | ||
return | ||
} | ||
ctx := appengine.NewContext(r) | ||
if manifest, err := getManifestForSHA(ctx, sha); err != nil { | ||
http.Error(w, err.Error(), http.StatusNotFound) | ||
} else { | ||
w.Header().Add("content-type", "application/json") | ||
w.Write(manifest) | ||
} | ||
} | ||
|
||
func gitHubSHASearchURL(sha string) string { | ||
return fmt.Sprintf(`https://api.github.com/search/issues?q=SHA:%s+user:w3c+repo:web-platform-tests`, sha) | ||
} | ||
|
||
func gitHubReleaseURL(tag string) string { | ||
return fmt.Sprintf(`https://api.github.com/repos/w3c/web-platform-tests/releases/tags/%s`, tag) | ||
} | ||
|
||
type gitHubClient interface { | ||
fetch(url string) ([]byte, error) | ||
} | ||
|
||
func getManifestForSHA(ctx context.Context, sha string) (manifest []byte, err error) { | ||
// Fetch models.Token entity for GitHub API Token. | ||
tokenKey := datastore.NewKey(ctx, "Token", "github-api-token", 0, nil) | ||
var token models.Token | ||
datastore.Get(ctx, tokenKey, &token) | ||
|
||
client := gitHubClientImpl{ | ||
Token: &token, | ||
Context: ctx, | ||
} | ||
return getGitHubReleaseAssetForSHA(&client, sha) | ||
} | ||
|
||
func getGitHubReleaseAssetForSHA(client gitHubClient, sha string) (manifest []byte, err error) { | ||
// Search for the PR associated with the SHA. | ||
url := gitHubSHASearchURL(sha) | ||
var body []byte | ||
if body, err = client.fetch(url); err != nil { | ||
return nil, err | ||
} | ||
|
||
var queryResults map[string]*json.RawMessage | ||
if err = json.Unmarshal(body, &queryResults); err != nil { | ||
return nil, err | ||
} | ||
var issues []map[string]*json.RawMessage | ||
if err = json.Unmarshal(*queryResults["items"], &issues); err != nil { | ||
return nil, err | ||
} | ||
if len(issues) < 1 { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd like to assert len(issues)==1 and perhaps log all anomalies. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Assertions should be for things that the programmer thinks is impossible, but logging which requests don't find a manifest SGTM. Except, do we have any server side logs at all currently? The less we log the less we need to think about PII and that kind of thing. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. AppEngine has built-in logging support. We can just use the standard logging facility provided by the language, and all the logs will go to Google Cloud Stackdriver which has a fancy UI for filtering. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Regarding PII, AppEngine logs requests already. We won't and can't add extra PII if we log some error messages. |
||
return nil, fmt.Errorf("No search results found for SHA %s", sha) | ||
} | ||
|
||
// Load the release by the presumed tag name merge_pr_* | ||
var prNumber int | ||
if err = json.Unmarshal(*issues[0]["number"], &prNumber); err != nil { | ||
return nil, err | ||
} | ||
|
||
releaseTag := fmt.Sprintf("merge_pr_%d", prNumber) | ||
url = gitHubReleaseURL(releaseTag) | ||
if body, err = client.fetch(url); err != nil { | ||
return nil, err | ||
} | ||
|
||
var release map[string]*json.RawMessage | ||
if err = json.Unmarshal(body, &release); err != nil { | ||
return nil, err | ||
} | ||
var assets []map[string]*json.RawMessage | ||
if err = json.Unmarshal(*release["assets"], &assets); err != nil { | ||
return nil, err | ||
} | ||
if len(assets) < 1 { | ||
return nil, fmt.Errorf("No assets found for release %s", releaseTag) | ||
} | ||
// Get (and unzip) the asset with name "MANIFEST-{sha}.json.gz" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need to unzip it, or could we set the transfer encoding and just pass along the data? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We played with this a bit. The short answer is that we can't, unless (potentially) we use some dirty hacks. Basically, GitHub responds a 302 for the download URL to a long AWS S3 URL that has a bunch of query parameters to tell S3 to force a browser download, which means S3 will disregard the If we absolutely want to to do it, I guess we can intercept 302 and modify the query parameters, but I guess we don't :) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We want to unzip it anyway, once we get to dumping it in the cache and allowing path filtering, etc. |
||
for _, asset := range assets { | ||
var name string | ||
if err = json.Unmarshal(*asset["name"], &name); err != nil { | ||
return nil, err | ||
} | ||
if strings.Contains(name, sha) { | ||
if err = json.Unmarshal(*asset["browser_download_url"], &url); err != nil { | ||
return nil, err | ||
} | ||
|
||
if body, err = client.fetch(url); err != nil { | ||
return nil, err | ||
} | ||
gzReader, err := gzip.NewReader(bytes.NewReader(body)) | ||
if err != nil { | ||
return nil, err | ||
} | ||
if body, err = ioutil.ReadAll(gzReader); err != nil { | ||
return nil, err | ||
} | ||
return body, nil | ||
} | ||
} | ||
return nil, fmt.Errorf("No manifest asset found for release %s", releaseTag) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
// Copyright 2017 The WPT Dashboard Project. All rights reserved. | ||
// Use of this source code is governed by a BSD-style license that can be | ||
// found in the LICENSE file. | ||
|
||
package webapp | ||
|
||
import ( | ||
"bytes" | ||
"compress/gzip" | ||
"encoding/json" | ||
"fmt" | ||
"testing" | ||
|
||
"github.com/stretchr/testify/assert" | ||
) | ||
|
||
type mockGitHubClient struct { | ||
Responses map[string][]byte | ||
} | ||
|
||
func (m *mockGitHubClient) fetch(url string) ([]byte, error) { | ||
if _, ok := m.Responses[url]; !ok { | ||
return nil, fmt.Errorf("fore! oh; for: %s", url) | ||
} | ||
return m.Responses[url], nil | ||
} | ||
|
||
func TestGetGitHubReleaseAssetForSHA_SHANotFound(t *testing.T) { | ||
client := mockGitHubClient{} | ||
const sha = "abcdef1234" | ||
manifest, err := getGitHubReleaseAssetForSHA(&client, sha) | ||
assert.Nil(t, manifest) | ||
assert.NotNil(t, err) | ||
} | ||
|
||
func TestGetGitHubReleaseAssetForSHA(t *testing.T) { | ||
const sha = "abcdef1234" | ||
searchResults, _ := json.Marshal( | ||
object{ | ||
"items": []object{ | ||
object{ | ||
"number": 123, | ||
}, | ||
}, | ||
}, | ||
) | ||
downloadURL := "http://github.com/magic_url" | ||
|
||
releasesJSON := object{ | ||
"assets": []object{ | ||
object{ | ||
"name": "MANIFEST-abcdef1234.json.gz", | ||
"browser_download_url": downloadURL, | ||
}, | ||
}, | ||
} | ||
|
||
var buf bytes.Buffer | ||
zw := gzip.NewWriter(&buf) | ||
zw.Write([]byte("magic data")) | ||
zw.Close() | ||
data := buf.Bytes() | ||
|
||
client := mockGitHubClient{ | ||
Responses: map[string][]byte{ | ||
gitHubSHASearchURL(sha): searchResults, | ||
gitHubReleaseURL("merge_pr_123"): unsafeMarshal(releasesJSON), | ||
downloadURL: data, | ||
}, | ||
} | ||
|
||
// 1) Data is unzipped. | ||
manifest, err := getGitHubReleaseAssetForSHA(&client, sha) | ||
assert.Nil(t, err) | ||
assert.Equal(t, []byte("magic data"), manifest) | ||
|
||
// 2) Correct asset picked when first asset is some other asset. | ||
releasesJSON["assets"] = []object{ | ||
object{ | ||
"name": "Some other asset.txt", | ||
"browser_download_url": "http://huh.com?", | ||
}, | ||
releasesJSON["assets"].([]object)[0], | ||
} | ||
client.Responses[gitHubReleaseURL("merge_pr_123")] = unsafeMarshal(releasesJSON) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It wouldn't affect the test, but checked that web-platform-tests/wpt#123 was merged very long ago so that we won't be creating a real tag called merge_pr_123 :) |
||
manifest, err = getGitHubReleaseAssetForSHA(&client, sha) | ||
assert.Nil(t, err) | ||
assert.Equal(t, []byte("magic data"), manifest) | ||
|
||
// 3) Error when no matching asset found. | ||
releasesJSON["assets"] = releasesJSON["assets"].([]object)[0:1] // Just the other asset | ||
client.Responses[gitHubReleaseURL("merge_pr_123")] = unsafeMarshal(releasesJSON) | ||
manifest, err = getGitHubReleaseAssetForSHA(&client, sha) | ||
assert.NotNil(t, err) | ||
assert.Nil(t, manifest) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
package webapp | ||
|
||
import ( | ||
"fmt" | ||
"io/ioutil" | ||
"net/http" | ||
|
||
models "github.com/web-platform-tests/wpt.fyi/shared" | ||
"golang.org/x/net/context" | ||
"google.golang.org/appengine/urlfetch" | ||
) | ||
|
||
type gitHubClientImpl struct { | ||
Token *models.Token | ||
Context context.Context | ||
} | ||
|
||
func (g *gitHubClientImpl) fetch(url string) ([]byte, error) { | ||
client := urlfetch.Client(g.Context) | ||
req, err := http.NewRequest("GET", url, nil) | ||
if err != nil { | ||
return nil, err | ||
} | ||
if g.Token != nil { | ||
req.Header.Add("Authorization", fmt.Sprintf("token %s", g.Token.Secret)) | ||
} | ||
var resp *http.Response | ||
if resp, err = client.Do(req); err != nil { | ||
return nil, err | ||
} | ||
defer resp.Body.Close() | ||
|
||
var body []byte | ||
if body, err = ioutil.ReadAll(resp.Body); err != nil { | ||
return nil, err | ||
} | ||
if resp.StatusCode != 200 { | ||
return nil, fmt.Errorf("%s returned HTTP status %d:\n%s", url, resp.StatusCode, string(body)) | ||
} | ||
return body, nil | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,9 +28,19 @@ const MaxCountMinValue = 1 | |
// SHARegex is a regex for SHA[0:10] slice of a git hash. | ||
var SHARegex = regexp.MustCompile("[0-9a-fA-F]{10,40}") | ||
|
||
// ParseSHAParam parses and validates the 'sha' param for the request. | ||
// It returns "latest" by default (and in error cases). | ||
// ParseSHAParam parses and validates the 'sha' param for the request, | ||
// cropping it to 10 chars. It returns "latest" by default. (and in error cases). | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is the cropping introduced? It looks like the longest ambiguous shas in wpt are 7 characters long (93d383f, d0b03af, e54fe8f) and it's fairly unlikely we will have a collision, but using full hashes everywhere internally and only accepting shorter ones as input seems like a good idea. But maybe this is necessary because 10-char hashes are parts of the filenames? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think Luke's aware of #49 , but many existing code doesn't take full SHAs so I assume this is a transient solution. At least this endpoint now takes a full SHA (despite chopping it internally), which is good. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note: This PR doesn't introduce the cropping, it bypasses it (since we want to use the full hash here). See #46 for the temporary 'fix' as Hexcles describes. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh... there was already a |
||
func ParseSHAParam(r *http.Request) (runSHA string, err error) { | ||
sha, err := ParseSHAParamFull(r) | ||
if err != nil || !SHARegex.MatchString(sha) { | ||
return sha, err | ||
} | ||
return sha[:10], nil | ||
} | ||
|
||
// ParseSHAParamFull parses and validates the 'sha' param for the request. | ||
// It returns "latest" by default (and in error cases). | ||
func ParseSHAParamFull(r *http.Request) (runSHA string, err error) { | ||
// Get the SHA for the run being loaded (the first part of the path.) | ||
runSHA = "latest" | ||
params, err := url.ParseQuery(r.URL.RawQuery) | ||
|
@@ -40,10 +50,10 @@ func ParseSHAParam(r *http.Request) (runSHA string, err error) { | |
|
||
runParam := params.Get("sha") | ||
if runParam != "" && runParam != "latest" { | ||
runSHA = runParam | ||
if !SHARegex.MatchString(runParam) { | ||
return "", fmt.Errorf("Invalid sha param value: %s", runParam) | ||
return "latest", fmt.Errorf("Invalid sha param value: %s", runParam) | ||
} | ||
runSHA = runParam[:10] | ||
} | ||
return runSHA, err | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's not very hard to hit the GitHub API quota. Could this try to read a token from a file, an environment variable, or something, and print a warning if none exists?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I didn't manage to exceed it in all of my trials, so I think I'll wait until I see a problem before I solve it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK, perhaps the 403s one easily gets with a few fast reloads of https://foolip.github.io/ecosystem-infra-rotation/ are a different kind of problem that you don't get with just a single request, maybe this: https://developer.github.com/v3/#abuse-rate-limits