[proxy] /headless-log-download: actually proxy the download from Gitp…

…od's domain to avoid CORS issues
gitpod-io · Sep 14, 2021 · 242cc63 · 242cc63
1 parent b288543
commit 242cc63
Show file tree

Hide file tree

Showing 2 changed files with 61 additions and 20 deletions.
diff --git a/components/proxy/conf/Caddyfile b/components/proxy/conf/Caddyfile
@@ -2,6 +2,7 @@
 	# disable automatic SSL certificate generation
 	auto_https off
 	# disable admin API server
+	# admin localhost:2019
 	admin off
 
 	# set default SNI for old clients
@@ -13,7 +14,7 @@
 	# https://caddyserver.com/docs/caddyfile/directives#directive-order
 	order gitpod.cors_origin            before header
 	order gitpod.workspace_download     before redir
-	order gitpod.headless_log_download  before redir
+	order gitpod.headless_log_download	before rewrite
 	order gitpod.sec_websocket_key      before header
 	order http_cache                    before reverse_proxy
 	order gitpod.body_intercept         before redir
@@ -154,23 +155,21 @@ https://{$GITPOD_DOMAIN} {
 			service http://server.{$KUBE_NAMESPACE}.{$KUBE_DOMAIN}:3000
 		}
 
+		# redirect works here because we "navigate" to this URL, which makes the browser handle this as primary request, and not fuff around with CORS at all
 		redir {http.gitpod.workspace_download_url} 303
 	}
 
 	@headless_log_download path /headless-log-download*
 	handle @headless_log_download {
-		import google_storage_headers
-
 		header {
-			# The browser needs to see the correct content type to trigger the download.
+			# Alltough logs are plain text "text/html" works for reliably for streaming
 			content-type "text/html; charset=utf-8"
 		}
 
+		# Perform lookup to server and actual reverse_proxy in one go because caddy's `reverse_proxy` is not powerful enough
 		gitpod.headless_log_download {
 			service http://server.{$KUBE_NAMESPACE}.{$KUBE_DOMAIN}:3000
 		}
-
-		redir {http.gitpod.headless_log_download_url} 303
 	}
 
 	@backend_wss {

diff --git a/components/proxy/plugins/headlesslogdownload/headless_log_download.go b/components/proxy/plugins/headlesslogdownload/headless_log_download.go
@@ -18,7 +18,6 @@ import (
 
 const (
 	headlessLogDownloadModule = "gitpod.headless_log_download"
-	redirectURLVariable       = "http." + headlessLogDownloadModule + "_url"
 )
 
 func init() {
@@ -41,19 +40,19 @@ func (HeadlessLogDownload) CaddyModule() caddy.ModuleInfo {
 
 // ServeHTTP implements caddyhttp.MiddlewareHandler.
 func (m HeadlessLogDownload) ServeHTTP(w http.ResponseWriter, r *http.Request, next caddyhttp.Handler) error {
-	repl := r.Context().Value(caddy.ReplacerCtxKey).(*caddy.Replacer)
-
 	query := r.URL.RawQuery
 	if query != "" {
 		query = "?" + query
 	}
 
-	// server has an endpoint on the same path that returns the
-	url := fmt.Sprintf("%v%v%v", m.Service, r.URL.Path, query)
+	// server has an endpoint on the same path that returns the upstream endpoint for the actual download
+	origReq := r.Context().Value(caddyhttp.OriginalRequestCtxKey).(http.Request)
+	u := fmt.Sprintf("%v%v%v", m.Service, origReq.URL.Path, query)
 	client := http.Client{Timeout: 5 * time.Second}
-	req, err := http.NewRequest("GET", url, nil)
+	req, err := http.NewRequest("GET", u, nil)
 	if err != nil {
-		return fmt.Errorf("Server Error: cannot download headless log")
+		caddy.Log().Sugar().Errorf("cannot resolve headless log URL %v: %w", u, err)
+		return fmt.Errorf("server error: cannot resolve headless log URL")
 	}
 
 	// pass browser headers
@@ -64,24 +63,42 @@ func (m HeadlessLogDownload) ServeHTTP(w http.ResponseWriter, r *http.Request, n
 		}
 	}
 
-	// override content-type
-	req.Header.Set("Content-Type", "*/*")
-
+	// query server and parse response
 	resp, err := client.Do(req)
 	if err != nil {
-		return fmt.Errorf("Server Error: cannot download headless log")
+		return fmt.Errorf("server error: cannot resolve headless log URL")
 	}
 	defer resp.Body.Close()
 
 	if resp.StatusCode != http.StatusOK {
 		return fmt.Errorf("Bad Request: /headless-log-download/get returned with code %v", resp.StatusCode)
 	}
 
-	redirectURL, err := io.ReadAll(resp.Body)
+	upstreamURLBytes, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return fmt.Errorf("server error: cannot obtain headless log redirect URL")
+	}
+	upstreamURL := string(upstreamURLBytes)
+
+	// perform the upstream request here
+	resp, err = http.Get(upstreamURL)
+	if err != nil {
+		caddy.Log().Sugar().Errorf("error starting download of prebuild log for %v: %v", upstreamURL, err)
+		return caddyhttp.Error(http.StatusInternalServerError, fmt.Errorf("unexpected error downloading prebuild log"))
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		caddy.Log().Sugar().Errorf("invalid status code downloading prebuild log for %v: %v", upstreamURL, resp.StatusCode)
+		return caddyhttp.Error(http.StatusInternalServerError, fmt.Errorf("unexpected error downloading prebuild log"))
+	}
+
+	brw := newNoBufferResponseWriter(w)
+	_, err = io.Copy(brw, resp.Body)
 	if err != nil {
-		return fmt.Errorf("Server error: cannot obtain headless log redirect URL")
+		caddy.Log().Sugar().Errorf("error proxying prebuild log download for %v: %v", upstreamURL, err)
+		return caddyhttp.Error(http.StatusInternalServerError, fmt.Errorf("unexpected error downloading prebuild log"))
 	}
-	repl.Set(redirectURLVariable, string(redirectURL))
 
 	return next.ServeHTTP(w, r)
 }
@@ -130,3 +147,28 @@ var (
 	_ caddyhttp.MiddlewareHandler = (*HeadlessLogDownload)(nil)
 	_ caddyfile.Unmarshaler       = (*HeadlessLogDownload)(nil)
 )
+
+// noBufferWriter ResponseWriter that allow an HTTP handler to flush buffered data to the client.
+type noBufferWriter struct {
+	w       http.ResponseWriter
+	flusher http.Flusher
+}
+
+func newNoBufferResponseWriter(w http.ResponseWriter) *noBufferWriter {
+	writer := &noBufferWriter{
+		w: w,
+	}
+	if flusher, ok := w.(http.Flusher); ok {
+		writer.flusher = flusher
+	}
+	return writer
+}
+
+func (n *noBufferWriter) Write(p []byte) (written int, err error) {
+	written, err = n.w.Write(p)
+	if n.flusher != nil {
+		n.flusher.Flush()
+	}
+
+	return
+}