diff --git a/Cargo.lock b/Cargo.lock index fa110811d..f2915289b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -556,6 +556,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "fuchsia-cprng" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" + [[package]] name = "funty" version = "2.0.0" @@ -1133,6 +1139,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", + "tempdir", "tokio", "tokio-util", "toml", @@ -1806,6 +1813,19 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" +[[package]] +name = "rand" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "552840b97013b1a26992c11eac34bdd778e464601a4c2054b5f0bff7c6761293" +dependencies = [ + "fuchsia-cprng", + "libc", + "rand_core 0.3.1", + "rdrand", + "winapi", +] + [[package]] name = "rand" version = "0.7.3" @@ -1850,6 +1870,21 @@ dependencies = [ "rand_core 0.6.4", ] +[[package]] +name = "rand_core" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b" +dependencies = [ + "rand_core 0.4.2", +] + +[[package]] +name = "rand_core" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc" + [[package]] name = "rand_core" version = "0.5.1" @@ -1899,6 +1934,15 @@ dependencies = [ "num_cpus", ] +[[package]] +name = "rdrand" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2" +dependencies = [ + "rand_core 0.3.1", +] + [[package]] name = "redox_syscall" version = "0.2.16" @@ -2246,6 +2290,16 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" +[[package]] +name = "tempdir" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15f2b5fb00ccdf689e0149d1b1b3c03fead81c2b37735d812fa8bddbbf41b6d8" +dependencies = [ + "rand 0.4.6", + "remove_dir_all", +] + [[package]] name = "tempfile" version = "3.3.0" diff --git a/josh-proxy/Cargo.toml b/josh-proxy/Cargo.toml index 9860d1bc4..e0dda4f25 100644 --- a/josh-proxy/Cargo.toml +++ b/josh-proxy/Cargo.toml @@ -44,3 +44,4 @@ url = "2.3.1" uuid = { version = "1.2.2", features = ["v4"] } josh-rpc = { path = "../josh-rpc" } tokio-util = "0.7.4" +tempdir = "0.3.7" diff --git a/josh-proxy/src/auth.rs b/josh-proxy/src/auth.rs index ffd44e4f8..0d3560850 100644 --- a/josh-proxy/src/auth.rs +++ b/josh-proxy/src/auth.rs @@ -73,13 +73,6 @@ pub async fn check_auth(url: &str, auth: &Handle, required: bool) -> josh::JoshR return Ok(false); } - // If the upsteam is ssh we don't really handle authentication here. - // All we need is a username, the private key is expected to available localy. - // This is really not secure at all and should never be used in a production deployment. - if url.starts_with("ssh") { - return Ok(auth.hash != ""); - } - if let Some(last) = AUTH_TIMERS.lock()?.get(&(url.to_string(), auth.clone())) { let since = std::time::Instant::now().duration_since(*last); tracing::trace!("last: {:?}, since: {:?}", last, since); @@ -99,39 +92,41 @@ pub async fn check_auth(url: &str, auth: &Handle, required: bool) -> josh::JoshR .get(auth) .unwrap_or(&Header { header: None }) .to_owned(); - let nurl = format!("{}/info/refs?service=git-upload-pack", url); + let refs_url = format!("{}/info/refs?service=git-upload-pack", url); - let builder = hyper::Request::builder().method("GET").uri(&nurl); + let builder = hyper::Request::builder() + .method(hyper::Method::GET) + .uri(&refs_url); - let builder = if let Some(h) = password.header { - builder.header("authorization", h) + let builder = if let Some(value) = password.header { + builder.header(hyper::header::AUTHORIZATION, value) } else { builder }; - let r = builder.body(hyper::Body::empty())?; - let resp = client.request(r).await?; + let request = builder.body(hyper::Body::empty())?; + let resp = client.request(request).await?; let status = resp.status(); tracing::trace!("http resp.status {:?}", resp.status()); - let msg = format!("got http response: {} {:?}", nurl, resp); + let err_msg = format!("got http response: {} {:?}", refs_url, resp); - if status == 200 { + if status == hyper::StatusCode::OK { AUTH_TIMERS .lock()? .insert((url.to_string(), auth.clone()), std::time::Instant::now()); Ok(true) - } else if status == 401 { - tracing::warn!("resp.status == 401: {:?}", &msg); + } else if status == hyper::StatusCode::UNAUTHORIZED { + tracing::warn!("resp.status == 401: {:?}", &err_msg); tracing::trace!( "body: {:?}", std::str::from_utf8(&hyper::body::to_bytes(resp.into_body()).await?) ); Ok(false) } else { - return Err(josh::josh_error(&msg)); + return Err(josh::josh_error(&err_msg)); } } @@ -139,7 +134,8 @@ pub fn strip_auth( req: hyper::Request, ) -> josh::JoshResult<(Handle, hyper::Request)> { let mut req = req; - let header: Option = req.headers_mut().remove("authorization"); + let header: Option = + req.headers_mut().remove(hyper::header::AUTHORIZATION); if let Some(header) = header { let hp = Handle { diff --git a/josh-proxy/src/bin/josh-proxy.rs b/josh-proxy/src/bin/josh-proxy.rs index e39e9aada..3b6815dad 100644 --- a/josh-proxy/src/bin/josh-proxy.rs +++ b/josh-proxy/src/bin/josh-proxy.rs @@ -2,7 +2,7 @@ #[macro_use] extern crate lazy_static; -use josh_proxy::{FetchError, MetaConfig, RemoteAuth, RepoConfig, RepoUpdate}; +use josh_proxy::{run_git_with_auth, FetchError, MetaConfig, RemoteAuth, RepoConfig, RepoUpdate}; use opentelemetry::global; use opentelemetry::sdk::propagation::TraceContextPropagator; use tracing_opentelemetry::OpenTelemetrySpanExt; @@ -15,7 +15,7 @@ use hyper::service::{make_service_fn, service_fn}; use hyper::{Request, Response, Server, StatusCode}; use hyper_reverse_proxy; use indoc::formatdoc; -use josh::{josh_error, JoshError}; +use josh::{josh_error, JoshError, JoshResult}; use josh_rpc::calls::RequestedCommand; use serde::Serialize; use std::collections::HashMap; @@ -104,27 +104,30 @@ async fn fetch_upstream( upstream_repo: String, remote_auth: &RemoteAuth, remote_url: String, - headref: &str, + head_ref: Option<&str>, + head_ref_resolved: Option<&str>, force: bool, ) -> Result<(), FetchError> { let key = remote_url.clone(); - let refs_to_fetch = - if !headref.is_empty() && headref != "HEAD" && !headref.starts_with("refs/heads/") { + let refs_to_fetch = match head_ref { + Some(head_ref) if head_ref != "HEAD" && !head_ref.starts_with("refs/heads/") => { vec![ "HEAD*", "refs/josh/*", "refs/heads/*", "refs/tags/*", - headref, + head_ref, ] - } else { + } + _ => { vec!["HEAD*", "refs/josh/*", "refs/heads/*", "refs/tags/*"] - }; + } + }; let refs_to_fetch: Vec<_> = refs_to_fetch.iter().map(|x| x.to_string()).collect(); - let fetch_cached_ok = { + let fetch_timer_ok = { if let Some(last) = service.fetch_timers.read()?.get(&key) { let since = std::time::Instant::now().duration_since(*last); let max = std::time::Duration::from_secs(ARGS.cache_duration); @@ -136,31 +139,42 @@ async fn fetch_upstream( } }; - let fetch_cached_ok = fetch_cached_ok && !force; - - tracing::trace!("fetch_cached_ok {:?}", fetch_cached_ok); - - if fetch_cached_ok && headref.is_empty() { - return Ok(()); - } - - if fetch_cached_ok && !headref.is_empty() { + let resolve_cache_ref = |cache_ref: &str| -> JoshResult> { let transaction = josh::cache::Transaction::open( &service.repo_path.join("mirror"), Some(&format!( "refs/josh/upstream/{}/", &josh::to_ns(&upstream_repo), )), - ) - .map_err(FetchError::from_josh_error)?; - let id = transaction + )?; + + match transaction .repo() - .refname_to_id(&transaction.refname(headref)); - tracing::trace!("refname_to_id: {:?}", id); - if id.is_ok() { - return Ok(()); + .refname_to_id(&transaction.refname(cache_ref)) + { + Ok(oid) => Ok(Some(oid)), + Err(_) => Ok(None), } - } + }; + + match (force, fetch_timer_ok, head_ref, head_ref_resolved) { + (false, true, None, _) => return Ok(()), + (false, true, Some(head_ref), _) => { + if let Some(_) = resolve_cache_ref(head_ref).map_err(FetchError::from_josh_error)? { + trace!("cache ref resolved"); + return Ok(()); + } + } + (false, false, Some(head_ref), Some(head_ref_resolved)) => { + if let Some(oid) = resolve_cache_ref(head_ref).map_err(FetchError::from_josh_error)? { + if oid.to_string() == head_ref_resolved { + trace!("cache ref resolved and matches"); + return Ok(()); + } + } + } + _ => (), + }; let fetch_timers = service.fetch_timers.clone(); let heads_map = service.heads_map.clone(); @@ -200,10 +214,12 @@ async fn fetch_upstream( std::mem::drop(permit); + if let Ok(_) = fetch_result { + fetch_timers.write()?.insert(key, std::time::Instant::now()); + } + match (fetch_result, remote_auth) { (Ok(_), RemoteAuth::Http { auth }) => { - fetch_timers.write()?.insert(key, std::time::Instant::now()); - let (auth_user, _) = auth.parse().map_err(FetchError::from_josh_error)?; if matches!(&ARGS.poll_user, Some(user) if auth_user == user.as_str()) { @@ -459,7 +475,8 @@ async fn query_meta_repo( meta_repo.to_owned(), &remote_auth, remote_url.to_owned(), - &"HEAD", + Some("HEAD"), + None, false, ) .in_current_span() @@ -555,6 +572,67 @@ async fn make_meta_config( } } +async fn ssh_list_refs( + url: &str, + auth_socket: std::path::PathBuf, + refs: Option<&[&str]>, +) -> JoshResult> { + let temp_dir = tempdir::TempDir::new("josh")?; + let refs = match refs { + Some(refs) => refs.to_vec(), + None => vec!["HEAD"], + }; + + let ls_remote = vec!["git", "ls-remote", url]; + let command = ls_remote + .iter() + .chain(refs.iter()) + .map(|s| s.to_string()) + .collect::>(); + + let result = tokio::task::spawn_blocking(move || -> JoshResult<(String, String, i32)> { + let command = command.iter().map(String::as_str).collect::>(); + + let (stdout, stderr, code) = run_git_with_auth( + temp_dir.path(), + &command, + &RemoteAuth::Ssh { auth_socket }, + None, + )?; + + Ok((stdout, stderr, code)) + }) + .await?; + + let stdout = match result { + Ok((stdout, _, 0)) => stdout, + Ok((_, stderr, code)) => { + return Err(josh_error(&format!( + "auth check: git exited with code {}: {}", + code, stderr + ))) + } + Err(e) => return Err(e), + }; + + let refs = stdout + .lines() + .map(|line| { + match line + .split('\t') + .map(str::to_owned) + .collect::>() + .as_slice() + { + [sha1, git_ref] => Ok((git_ref.to_owned(), sha1.to_owned())), + _ => Err(josh_error("could not parse result of ls-remote")), + } + }) + .collect::>>()?; + + Ok(refs) +} + async fn serve_namespace( params: &josh_rpc::calls::ServeNamespace, repo_path: std::path::PathBuf, @@ -772,8 +850,9 @@ async fn handle_serve_namespace_request( )); }; + let auth_socket = params.ssh_socket.clone(); let remote_auth = RemoteAuth::Ssh { - auth_socket: params.ssh_socket.clone(), + auth_socket: auth_socket.clone(), }; let meta_config = match make_meta_config( @@ -813,13 +892,35 @@ async fn handle_serve_namespace_request( let remote_url = upstream + meta_config.config.repo.as_str(); let headref = headref_or_default(&parsed_url.headref); + let remote_refs = [headref.as_str()]; + let remote_refs = match ssh_list_refs(&remote_url, auth_socket, Some(&remote_refs)).await { + Ok(remote_refs) => remote_refs, + Err(e) => { + return Ok(make_response( + hyper::Body::from(e.to_string()), + hyper::StatusCode::FORBIDDEN, + )) + } + }; + + let resolved_ref = match remote_refs.get(&headref) { + Some(resolved_ref) => resolved_ref, + None => { + return Ok(make_response( + hyper::Body::from("Could not resolve remote ref"), + hyper::StatusCode::INTERNAL_SERVER_ERROR, + )) + } + }; + match fetch_upstream( serv.clone(), meta_config.config.repo.to_owned(), &remote_auth, remote_url.to_owned(), - &headref, - true, + Some(&headref), + Some(resolved_ref), + false, ) .await { @@ -1056,7 +1157,8 @@ async fn call_service( meta.config.repo.to_owned(), &remote_auth, remote_url.to_owned(), - &headref, + Some(&headref), + None, false, ) .in_current_span() @@ -1368,7 +1470,8 @@ async fn run_polling(serv: Arc) -> josh::JoshResult<()> { upstream_repo.clone(), &remote_auth, url.clone(), - "", + None, + None, true, ) .in_current_span() @@ -1517,7 +1620,8 @@ async fn serve_graphql( upstream_repo.to_owned(), &remote_auth, remote_url.to_owned(), - &"HEAD", + Some("HEAD"), + None, false, ) .in_current_span() diff --git a/josh-proxy/src/lib.rs b/josh-proxy/src/lib.rs index f0a08c6cf..1668aeff3 100644 --- a/josh-proxy/src/lib.rs +++ b/josh-proxy/src/lib.rs @@ -555,7 +555,7 @@ fn make_ssh_command() -> String { format!("ssh {}", ssh_options.join(" ")) } -fn run_git_with_auth( +pub fn run_git_with_auth( cwd: &std::path::Path, cmd: &[&str], remote_auth: &RemoteAuth, diff --git a/tests/proxy/caching.t b/tests/proxy/caching.t index ad0e26a2f..8c6a2ccf6 100644 --- a/tests/proxy/caching.t +++ b/tests/proxy/caching.t @@ -41,8 +41,8 @@ From http://localhost:8002/real_repo.git:/sub1 + bdc926c...eb6a311 master -> origin/master (forced update) $ git fetch - $ grep -o "fetch_cached_ok true" ${TESTTMP}/josh-proxy.out | uniq - fetch_cached_ok true + $ grep -o "cache ref resolved" ${TESTTMP}/josh-proxy.out | uniq + cache ref resolved $ bash ${TESTDIR}/destroy_test_env.sh "real_repo.git" = [