diff --git a/README.md b/README.md index 89d7a78..6556779 100644 --- a/README.md +++ b/README.md @@ -69,6 +69,7 @@ There are also some current non-choices that we would like to push into the real * Different versions of packages from the same repository cannot be used * Importable projects that are not bound to the repository root +* Source inference around different import path patterns (e.g., how `github.com/*` or `my_company/*` are handled) ### Choices diff --git a/appveyor.yml b/appveyor.yml index 8f25b03..8c6b1fd 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -12,7 +12,7 @@ platform: install: - go version - go env - - choco install bzr hg + - choco install bzr - set PATH=C:\Program Files (x86)\Bazaar\;C:\Program Files\Mercurial\;%PATH% build_script: - go get github.com/Masterminds/glide diff --git a/bridge.go b/bridge.go index d09a35a..2aae74b 100644 --- a/bridge.go +++ b/bridge.go @@ -12,33 +12,20 @@ import ( // sourceBridges provide an adapter to SourceManagers that tailor operations // for a single solve run. type sourceBridge interface { - getManifestAndLock(pa atom) (Manifest, Lock, error) - listVersions(id ProjectIdentifier) ([]Version, error) - listPackages(id ProjectIdentifier, v Version) (PackageTree, error) + SourceManager // composes SourceManager + verifyRootDir(path string) error computeRootReach() ([]string, error) - revisionPresentIn(id ProjectIdentifier, r Revision) (bool, error) pairRevision(id ProjectIdentifier, r Revision) []Version pairVersion(id ProjectIdentifier, v UnpairedVersion) PairedVersion - repoExists(id ProjectIdentifier) (bool, error) vendorCodeExists(id ProjectIdentifier) (bool, error) matches(id ProjectIdentifier, c Constraint, v Version) bool matchesAny(id ProjectIdentifier, c1, c2 Constraint) bool intersect(id ProjectIdentifier, c1, c2 Constraint) Constraint - verifyRootDir(path string) error - analyzerInfo() (string, *semver.Version) - deduceRemoteRepo(path string) (*remoteRepo, error) } // bridge is an adapter around a proper SourceManager. It provides localized // caching that's tailored to the requirements of a particular solve run. // -// It also performs transformations between ProjectIdentifiers, which is what -// the solver primarily deals in, and ProjectRoot, which is what the -// SourceManager primarily deals in. This separation is helpful because it keeps -// the complexities of deciding what a particular name "means" entirely within -// the solver, while the SourceManager can traffic exclusively in -// globally-unique network names. -// // Finally, it provides authoritative version/constraint operations, ensuring // that any possible approach to a match - even those not literally encoded in // the inputs - is achieved. @@ -63,7 +50,7 @@ type bridge struct { // layered on top of the proper SourceManager's cache; the only difference // is that this keeps the versions sorted in the direction required by the // current solve run - vlists map[ProjectRoot][]Version + vlists map[ProjectIdentifier][]Version } // Global factory func to create a bridge. This exists solely to allow tests to @@ -72,38 +59,27 @@ var mkBridge func(*solver, SourceManager) sourceBridge = func(s *solver, sm Sour return &bridge{ sm: sm, s: s, - vlists: make(map[ProjectRoot][]Version), + vlists: make(map[ProjectIdentifier][]Version), } } -func (b *bridge) getManifestAndLock(pa atom) (Manifest, Lock, error) { - if pa.id.ProjectRoot == b.s.params.ImportRoot { +func (b *bridge) GetManifestAndLock(id ProjectIdentifier, v Version) (Manifest, Lock, error) { + if id.ProjectRoot == b.s.params.ImportRoot { return b.s.rm, b.s.rl, nil } - return b.sm.GetManifestAndLock(ProjectRoot(pa.id.netName()), pa.v) + return b.sm.GetManifestAndLock(id, v) } -func (b *bridge) analyzerInfo() (string, *semver.Version) { +func (b *bridge) AnalyzerInfo() (string, *semver.Version) { return b.sm.AnalyzerInfo() } -func (b *bridge) key(id ProjectIdentifier) ProjectRoot { - k := ProjectRoot(id.NetworkName) - if k == "" { - k = id.ProjectRoot - } - - return k -} - -func (b *bridge) listVersions(id ProjectIdentifier) ([]Version, error) { - k := b.key(id) - - if vl, exists := b.vlists[k]; exists { +func (b *bridge) ListVersions(id ProjectIdentifier) ([]Version, error) { + if vl, exists := b.vlists[id]; exists { return vl, nil } - vl, err := b.sm.ListVersions(k) + vl, err := b.sm.ListVersions(id) // TODO(sdboyer) cache errors, too? if err != nil { return nil, err @@ -115,18 +91,16 @@ func (b *bridge) listVersions(id ProjectIdentifier) ([]Version, error) { sort.Sort(upgradeVersionSorter(vl)) } - b.vlists[k] = vl + b.vlists[id] = vl return vl, nil } -func (b *bridge) revisionPresentIn(id ProjectIdentifier, r Revision) (bool, error) { - k := b.key(id) - return b.sm.RevisionPresentIn(k, r) +func (b *bridge) RevisionPresentIn(id ProjectIdentifier, r Revision) (bool, error) { + return b.sm.RevisionPresentIn(id, r) } -func (b *bridge) repoExists(id ProjectIdentifier) (bool, error) { - k := b.key(id) - return b.sm.RepoExists(k) +func (b *bridge) SourceExists(id ProjectIdentifier) (bool, error) { + return b.sm.SourceExists(id) } func (b *bridge) vendorCodeExists(id ProjectIdentifier) (bool, error) { @@ -141,7 +115,7 @@ func (b *bridge) vendorCodeExists(id ProjectIdentifier) (bool, error) { } func (b *bridge) pairVersion(id ProjectIdentifier, v UnpairedVersion) PairedVersion { - vl, err := b.listVersions(id) + vl, err := b.ListVersions(id) if err != nil { return nil } @@ -159,7 +133,7 @@ func (b *bridge) pairVersion(id ProjectIdentifier, v UnpairedVersion) PairedVers } func (b *bridge) pairRevision(id ProjectIdentifier, r Revision) []Version { - vl, err := b.listVersions(id) + vl, err := b.ListVersions(id) if err != nil { return nil } @@ -409,14 +383,17 @@ func (b *bridge) listRootPackages() (PackageTree, error) { // // The root project is handled separately, as the source manager isn't // responsible for that code. -func (b *bridge) listPackages(id ProjectIdentifier, v Version) (PackageTree, error) { +func (b *bridge) ListPackages(id ProjectIdentifier, v Version) (PackageTree, error) { if id.ProjectRoot == b.s.params.ImportRoot { return b.listRootPackages() } - // FIXME if we're aliasing here, the returned PackageTree will have - // unaliased import paths, which is super not correct - return b.sm.ListPackages(b.key(id), v) + return b.sm.ListPackages(id, v) +} + +func (b *bridge) ExportProject(id ProjectIdentifier, v Version, path string) error { + //return b.sm.ExportProject(id, v, path) + panic("bridge should never be used to ExportProject") } // verifyRoot ensures that the provided path to the project root is in good @@ -432,10 +409,8 @@ func (b *bridge) verifyRootDir(path string) error { return nil } -// deduceRemoteRepo deduces certain network-oriented properties about an import -// path. -func (b *bridge) deduceRemoteRepo(path string) (*remoteRepo, error) { - return deduceRemoteRepo(path) +func (b *bridge) DeduceProjectRoot(ip string) (ProjectRoot, error) { + return b.sm.DeduceProjectRoot(ip) } // versionTypeUnion represents a set of versions that are, within the scope of diff --git a/deduce.go b/deduce.go new file mode 100644 index 0000000..25dc93d --- /dev/null +++ b/deduce.go @@ -0,0 +1,777 @@ +package gps + +import ( + "fmt" + "io" + "net/http" + "net/url" + "path" + "regexp" + "strings" +) + +var ( + gitSchemes = []string{"https", "ssh", "git", "http"} + bzrSchemes = []string{"https", "bzr+ssh", "bzr", "http"} + hgSchemes = []string{"https", "ssh", "http"} + svnSchemes = []string{"https", "http", "svn", "svn+ssh"} +) + +func validateVCSScheme(scheme, typ string) bool { + // everything allows plain ssh + if scheme == "ssh" { + return true + } + + var schemes []string + switch typ { + case "git": + schemes = gitSchemes + case "bzr": + schemes = bzrSchemes + case "hg": + schemes = hgSchemes + case "svn": + schemes = svnSchemes + default: + panic(fmt.Sprint("unsupported vcs type", scheme)) + } + + for _, valid := range schemes { + if scheme == valid { + return true + } + } + return false +} + +// Regexes for the different known import path flavors +var ( + // This regex allowed some usernames that github currently disallows. They + // may have allowed them in the past; keeping it in case we need to revert. + //ghRegex = regexp.MustCompile(`^(?Pgithub\.com/([A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+))(/[A-Za-z0-9_.\-]+)*$`) + ghRegex = regexp.MustCompile(`^(?Pgithub\.com(/[A-Za-z0-9][-A-Za-z0-9]*[A-Za-z0-9]/[A-Za-z0-9_.\-]+))((?:/[A-Za-z0-9_.\-]+)*)$`) + gpinNewRegex = regexp.MustCompile(`^(?Pgopkg\.in(?:(/[a-zA-Z0-9][-a-zA-Z0-9]+)?)(/[a-zA-Z][-.a-zA-Z0-9]*)\.((?:v0|v[1-9][0-9]*)(?:\.0|\.[1-9][0-9]*){0,2}(?:-unstable)?)(?:\.git)?)((?:/[a-zA-Z0-9][-.a-zA-Z0-9]*)*)$`) + //gpinOldRegex = regexp.MustCompile(`^(?Pgopkg\.in/(?:([a-z0-9][-a-z0-9]+)/)?((?:v0|v[1-9][0-9]*)(?:\.0|\.[1-9][0-9]*){0,2}(-unstable)?)/([a-zA-Z][-a-zA-Z0-9]*)(?:\.git)?)((?:/[a-zA-Z][-a-zA-Z0-9]*)*)$`) + bbRegex = regexp.MustCompile(`^(?Pbitbucket\.org(?P/[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+))((?:/[A-Za-z0-9_.\-]+)*)$`) + //lpRegex = regexp.MustCompile(`^(?Plaunchpad\.net/([A-Za-z0-9-._]+)(/[A-Za-z0-9-._]+)?)(/.+)?`) + lpRegex = regexp.MustCompile(`^(?Plaunchpad\.net(/[A-Za-z0-9-._]+))((?:/[A-Za-z0-9_.\-]+)*)?`) + //glpRegex = regexp.MustCompile(`^(?Pgit\.launchpad\.net/([A-Za-z0-9_.\-]+)|~[A-Za-z0-9_.\-]+/(\+git|[A-Za-z0-9_.\-]+)/[A-Za-z0-9_.\-]+)$`) + glpRegex = regexp.MustCompile(`^(?Pgit\.launchpad\.net(/[A-Za-z0-9_.\-]+))((?:/[A-Za-z0-9_.\-]+)*)$`) + //gcRegex = regexp.MustCompile(`^(?Pcode\.google\.com/[pr]/(?P[a-z0-9\-]+)(\.(?P[a-z0-9\-]+))?)(/[A-Za-z0-9_.\-]+)*$`) + jazzRegex = regexp.MustCompile(`^(?Phub\.jazz\.net(/git/[a-z0-9]+/[A-Za-z0-9_.\-]+))((?:/[A-Za-z0-9_.\-]+)*)$`) + apacheRegex = regexp.MustCompile(`^(?Pgit\.apache\.org(/[a-z0-9_.\-]+\.git))((?:/[A-Za-z0-9_.\-]+)*)$`) + vcsExtensionRegex = regexp.MustCompile(`^(?P([a-z0-9.\-]+\.)+[a-z0-9.\-]+(:[0-9]+)?/[A-Za-z0-9_.\-/~]*?\.(?Pbzr|git|hg|svn))((?:/[A-Za-z0-9_.\-]+)*)$`) +) + +// Other helper regexes +var ( + scpSyntaxRe = regexp.MustCompile(`^([a-zA-Z0-9_]+)@([a-zA-Z0-9._-]+):(.*)$`) + pathvld = regexp.MustCompile(`^([A-Za-z0-9-]+)(\.[A-Za-z0-9-]+)+(/[A-Za-z0-9-_.~]+)*$`) +) + +func pathDeducerTrie() deducerTrie { + dxt := newDeducerTrie() + + dxt.Insert("github.com/", githubDeducer{regexp: ghRegex}) + dxt.Insert("gopkg.in/", gopkginDeducer{regexp: gpinNewRegex}) + dxt.Insert("bitbucket.org/", bitbucketDeducer{regexp: bbRegex}) + dxt.Insert("launchpad.net/", launchpadDeducer{regexp: lpRegex}) + dxt.Insert("git.launchpad.net/", launchpadGitDeducer{regexp: glpRegex}) + dxt.Insert("hub.jazz.net/", jazzDeducer{regexp: jazzRegex}) + dxt.Insert("git.apache.org/", apacheDeducer{regexp: apacheRegex}) + + return dxt +} + +type pathDeducer interface { + deduceRoot(string) (string, error) + deduceSource(string, *url.URL) (maybeSource, error) +} + +type githubDeducer struct { + regexp *regexp.Regexp +} + +func (m githubDeducer) deduceRoot(path string) (string, error) { + v := m.regexp.FindStringSubmatch(path) + if v == nil { + return "", fmt.Errorf("%s is not a valid path for a source on github.com", path) + } + + return "github.com" + v[2], nil +} + +func (m githubDeducer) deduceSource(path string, u *url.URL) (maybeSource, error) { + v := m.regexp.FindStringSubmatch(path) + if v == nil { + return nil, fmt.Errorf("%s is not a valid path for a source on github.com", path) + } + + u.Host = "github.com" + u.Path = v[2] + + if u.Scheme == "ssh" && u.User != nil && u.User.Username() != "git" { + return nil, fmt.Errorf("github ssh must be accessed via the 'git' user; %s was provided", u.User.Username()) + } else if u.Scheme != "" { + if !validateVCSScheme(u.Scheme, "git") { + return nil, fmt.Errorf("%s is not a valid scheme for accessing a git repository", u.Scheme) + } + if u.Scheme == "ssh" { + u.User = url.User("git") + } + return maybeGitSource{url: u}, nil + } + + mb := make(maybeSources, len(gitSchemes)) + for k, scheme := range gitSchemes { + u2 := *u + if scheme == "ssh" { + u2.User = url.User("git") + } + u2.Scheme = scheme + mb[k] = maybeGitSource{url: &u2} + } + + return mb, nil +} + +type bitbucketDeducer struct { + regexp *regexp.Regexp +} + +func (m bitbucketDeducer) deduceRoot(path string) (string, error) { + v := m.regexp.FindStringSubmatch(path) + if v == nil { + return "", fmt.Errorf("%s is not a valid path for a source on bitbucket.org", path) + } + + return "bitbucket.org" + v[2], nil +} + +func (m bitbucketDeducer) deduceSource(path string, u *url.URL) (maybeSource, error) { + v := m.regexp.FindStringSubmatch(path) + if v == nil { + return nil, fmt.Errorf("%s is not a valid path for a source on bitbucket.org", path) + } + + u.Host = "bitbucket.org" + u.Path = v[2] + + // This isn't definitive, but it'll probably catch most + isgit := strings.HasSuffix(u.Path, ".git") || (u.User != nil && u.User.Username() == "git") + ishg := strings.HasSuffix(u.Path, ".hg") || (u.User != nil && u.User.Username() == "hg") + + // TODO(sdboyer) resolve scm ambiguity if needed by querying bitbucket's REST API + if u.Scheme != "" { + validgit, validhg := validateVCSScheme(u.Scheme, "git"), validateVCSScheme(u.Scheme, "hg") + if isgit { + if !validgit { + // This is unreachable for now, as the git schemes are a + // superset of the hg schemes + return nil, fmt.Errorf("%s is not a valid scheme for accessing a git repository", u.Scheme) + } + return maybeGitSource{url: u}, nil + } else if ishg { + if !validhg { + return nil, fmt.Errorf("%s is not a valid scheme for accessing an hg repository", u.Scheme) + } + return maybeHgSource{url: u}, nil + } else if !validgit && !validhg { + return nil, fmt.Errorf("%s is not a valid scheme for accessing either a git or hg repository", u.Scheme) + } + + // No other choice, make an option for both git and hg + return maybeSources{ + maybeHgSource{url: u}, + maybeGitSource{url: u}, + }, nil + } + + mb := make(maybeSources, 0) + // git is probably more common, even on bitbucket. however, bitbucket + // appears to fail _extremely_ slowly on git pings (ls-remote) when the + // underlying repository is actually an hg repository, so it's better + // to try hg first. + if !isgit { + for _, scheme := range hgSchemes { + u2 := *u + if scheme == "ssh" { + u2.User = url.User("hg") + } + u2.Scheme = scheme + mb = append(mb, maybeHgSource{url: &u2}) + } + } + + if !ishg { + for _, scheme := range gitSchemes { + u2 := *u + if scheme == "ssh" { + u2.User = url.User("git") + } + u2.Scheme = scheme + mb = append(mb, maybeGitSource{url: &u2}) + } + } + + return mb, nil +} + +type gopkginDeducer struct { + regexp *regexp.Regexp +} + +func (m gopkginDeducer) deduceRoot(p string) (string, error) { + v, err := m.parseAndValidatePath(p) + if err != nil { + return "", err + } + + return v[1], nil +} + +func (m gopkginDeducer) parseAndValidatePath(p string) ([]string, error) { + v := m.regexp.FindStringSubmatch(p) + if v == nil { + return nil, fmt.Errorf("%s is not a valid path for a source on gopkg.in", p) + } + + // We duplicate some logic from the gopkg.in server in order to validate the + // import path string without having to make a network request + if strings.Contains(v[4], ".") { + return nil, fmt.Errorf("%s is not a valid import path; gopkg.in only allows major versions (%q instead of %q)", + p, v[4][:strings.Index(v[4], ".")], v[4]) + } + + return v, nil +} + +func (m gopkginDeducer) deduceSource(p string, u *url.URL) (maybeSource, error) { + // Reuse root detection logic for initial validation + v, err := m.parseAndValidatePath(p) + if err != nil { + return nil, err + } + + // Putting a scheme on gopkg.in would be really weird, disallow it + if u.Scheme != "" { + return nil, fmt.Errorf("Specifying alternate schemes on gopkg.in imports is not permitted") + } + + // gopkg.in is always backed by github + u.Host = "github.com" + if v[2] == "" { + elem := v[3][1:] + u.Path = path.Join("/go-"+elem, elem) + } else { + u.Path = path.Join(v[2], v[3]) + } + + mb := make(maybeSources, len(gitSchemes)) + for k, scheme := range gitSchemes { + u2 := *u + if scheme == "ssh" { + u2.User = url.User("git") + } + u2.Scheme = scheme + mb[k] = maybeGitSource{url: &u2} + } + + return mb, nil +} + +type launchpadDeducer struct { + regexp *regexp.Regexp +} + +func (m launchpadDeducer) deduceRoot(path string) (string, error) { + // TODO(sdboyer) lp handling is nasty - there's ambiguities which can only really + // be resolved with a metadata request. See https://github.com/golang/go/issues/11436 + v := m.regexp.FindStringSubmatch(path) + if v == nil { + return "", fmt.Errorf("%s is not a valid path for a source on launchpad.net", path) + } + + return "launchpad.net" + v[2], nil +} + +func (m launchpadDeducer) deduceSource(path string, u *url.URL) (maybeSource, error) { + v := m.regexp.FindStringSubmatch(path) + if v == nil { + return nil, fmt.Errorf("%s is not a valid path for a source on launchpad.net", path) + } + + u.Host = "launchpad.net" + u.Path = v[2] + + if u.Scheme != "" { + if !validateVCSScheme(u.Scheme, "bzr") { + return nil, fmt.Errorf("%s is not a valid scheme for accessing a bzr repository", u.Scheme) + } + return maybeBzrSource{url: u}, nil + } + + mb := make(maybeSources, len(bzrSchemes)) + for k, scheme := range bzrSchemes { + u2 := *u + u2.Scheme = scheme + mb[k] = maybeBzrSource{url: &u2} + } + + return mb, nil +} + +type launchpadGitDeducer struct { + regexp *regexp.Regexp +} + +func (m launchpadGitDeducer) deduceRoot(path string) (string, error) { + // TODO(sdboyer) same ambiguity issues as with normal bzr lp + v := m.regexp.FindStringSubmatch(path) + if v == nil { + return "", fmt.Errorf("%s is not a valid path for a source on git.launchpad.net", path) + } + + return "git.launchpad.net" + v[2], nil +} + +func (m launchpadGitDeducer) deduceSource(path string, u *url.URL) (maybeSource, error) { + v := m.regexp.FindStringSubmatch(path) + if v == nil { + return nil, fmt.Errorf("%s is not a valid path for a source on git.launchpad.net", path) + } + + u.Host = "git.launchpad.net" + u.Path = v[2] + + if u.Scheme != "" { + if !validateVCSScheme(u.Scheme, "git") { + return nil, fmt.Errorf("%s is not a valid scheme for accessing a git repository", u.Scheme) + } + return maybeGitSource{url: u}, nil + } + + mb := make(maybeSources, len(gitSchemes)) + for k, scheme := range gitSchemes { + u2 := *u + u2.Scheme = scheme + mb[k] = maybeGitSource{url: &u2} + } + + return mb, nil +} + +type jazzDeducer struct { + regexp *regexp.Regexp +} + +func (m jazzDeducer) deduceRoot(path string) (string, error) { + v := m.regexp.FindStringSubmatch(path) + if v == nil { + return "", fmt.Errorf("%s is not a valid path for a source on hub.jazz.net", path) + } + + return "hub.jazz.net" + v[2], nil +} + +func (m jazzDeducer) deduceSource(path string, u *url.URL) (maybeSource, error) { + v := m.regexp.FindStringSubmatch(path) + if v == nil { + return nil, fmt.Errorf("%s is not a valid path for a source on hub.jazz.net", path) + } + + u.Host = "hub.jazz.net" + u.Path = v[2] + + switch u.Scheme { + case "": + u.Scheme = "https" + fallthrough + case "https": + return maybeGitSource{url: u}, nil + default: + return nil, fmt.Errorf("IBM's jazz hub only supports https, %s is not allowed", u.String()) + } +} + +type apacheDeducer struct { + regexp *regexp.Regexp +} + +func (m apacheDeducer) deduceRoot(path string) (string, error) { + v := m.regexp.FindStringSubmatch(path) + if v == nil { + return "", fmt.Errorf("%s is not a valid path for a source on git.apache.org", path) + } + + return "git.apache.org" + v[2], nil +} + +func (m apacheDeducer) deduceSource(path string, u *url.URL) (maybeSource, error) { + v := m.regexp.FindStringSubmatch(path) + if v == nil { + return nil, fmt.Errorf("%s is not a valid path for a source on git.apache.org", path) + } + + u.Host = "git.apache.org" + u.Path = v[2] + + if u.Scheme != "" { + if !validateVCSScheme(u.Scheme, "git") { + return nil, fmt.Errorf("%s is not a valid scheme for accessing a git repository", u.Scheme) + } + return maybeGitSource{url: u}, nil + } + + mb := make(maybeSources, len(gitSchemes)) + for k, scheme := range gitSchemes { + u2 := *u + u2.Scheme = scheme + mb[k] = maybeGitSource{url: &u2} + } + + return mb, nil +} + +type vcsExtensionDeducer struct { + regexp *regexp.Regexp +} + +func (m vcsExtensionDeducer) deduceRoot(path string) (string, error) { + v := m.regexp.FindStringSubmatch(path) + if v == nil { + return "", fmt.Errorf("%s contains no vcs extension hints for matching", path) + } + + return v[1], nil +} + +func (m vcsExtensionDeducer) deduceSource(path string, u *url.URL) (maybeSource, error) { + v := m.regexp.FindStringSubmatch(path) + if v == nil { + return nil, fmt.Errorf("%s contains no vcs extension hints for matching", path) + } + + switch v[4] { + case "git", "hg", "bzr": + x := strings.SplitN(v[1], "/", 2) + // TODO(sdboyer) is this actually correct for bzr? + u.Host = x[0] + u.Path = "/" + x[1] + + if u.Scheme != "" { + if !validateVCSScheme(u.Scheme, v[4]) { + return nil, fmt.Errorf("%s is not a valid scheme for accessing %s repositories (path %s)", u.Scheme, v[4], path) + } + + switch v[4] { + case "git": + return maybeGitSource{url: u}, nil + case "bzr": + return maybeBzrSource{url: u}, nil + case "hg": + return maybeHgSource{url: u}, nil + } + } + + var schemes []string + var mb maybeSources + var f func(k int, u *url.URL) + + switch v[4] { + case "git": + schemes = gitSchemes + f = func(k int, u *url.URL) { + mb[k] = maybeGitSource{url: u} + } + case "bzr": + schemes = bzrSchemes + f = func(k int, u *url.URL) { + mb[k] = maybeBzrSource{url: u} + } + case "hg": + schemes = hgSchemes + f = func(k int, u *url.URL) { + mb[k] = maybeHgSource{url: u} + } + } + + mb = make(maybeSources, len(schemes)) + for k, scheme := range schemes { + u2 := *u + u2.Scheme = scheme + f(k, &u2) + } + + return mb, nil + default: + return nil, fmt.Errorf("unknown repository type: %q", v[4]) + } +} + +type stringFuture func() (string, error) +type sourceFuture func() (source, string, error) +type partialSourceFuture func(string, ProjectAnalyzer) sourceFuture + +type deductionFuture struct { + // rslow indicates that the root future may be a slow call (that it has to + // hit the network for some reason) + rslow bool + root stringFuture + psf partialSourceFuture +} + +// deduceFromPath takes an import path and attempts to deduce various +// metadata about it - what type of source should handle it, and where its +// "root" is (for vcs repositories, the repository root). +// +// The results are wrapped in futures, as most of these operations require at +// least some network activity to complete. For the first return value, network +// activity will be triggered when the future is called. For the second, +// network activity is triggered only when calling the sourceFuture returned +// from the partialSourceFuture. +func (sm *SourceMgr) deduceFromPath(path string) (deductionFuture, error) { + opath := path + u, path, err := normalizeURI(path) + if err != nil { + return deductionFuture{}, err + } + + // Helpers to futurize the results from deducers + strfut := func(s string) stringFuture { + return func() (string, error) { + return s, nil + } + } + + srcfut := func(mb maybeSource) partialSourceFuture { + return func(cachedir string, an ProjectAnalyzer) sourceFuture { + var src source + var ident string + var err error + + c := make(chan struct{}, 1) + go func() { + defer close(c) + src, ident, err = mb.try(cachedir, an) + }() + + return func() (source, string, error) { + <-c + return src, ident, err + } + } + } + + // First, try the root path-based matches + if _, mtchi, has := sm.dxt.LongestPrefix(path); has { + mtch := mtchi.(pathDeducer) + root, err := mtch.deduceRoot(path) + if err != nil { + return deductionFuture{}, err + } + mb, err := mtch.deduceSource(path, u) + if err != nil { + return deductionFuture{}, err + } + + return deductionFuture{ + rslow: false, + root: strfut(root), + psf: srcfut(mb), + }, nil + } + + // Next, try the vcs extension-based (infix) matcher + exm := vcsExtensionDeducer{regexp: vcsExtensionRegex} + if root, err := exm.deduceRoot(path); err == nil { + mb, err := exm.deduceSource(path, u) + if err != nil { + return deductionFuture{}, err + } + + return deductionFuture{ + rslow: false, + root: strfut(root), + psf: srcfut(mb), + }, nil + } + + // No luck so far. maybe it's one of them vanity imports? + // We have to get a little fancier for the metadata lookup by chaining the + // source future onto the metadata future + + // Declare these out here so they're available for the source future + var vcs string + var ru *url.URL + + // Kick off the vanity metadata fetch + var importroot string + var futerr error + c := make(chan struct{}, 1) + go func() { + defer close(c) + var reporoot string + importroot, vcs, reporoot, futerr = parseMetadata(path) + if futerr != nil { + futerr = fmt.Errorf("unable to deduce repository and source type for: %q", opath) + return + } + + // If we got something back at all, then it supercedes the actual input for + // the real URL to hit + ru, futerr = url.Parse(reporoot) + if futerr != nil { + futerr = fmt.Errorf("server returned bad URL when searching for vanity import: %q", reporoot) + importroot = "" + return + } + }() + + // Set up the root func to catch the result + root := func() (string, error) { + <-c + return importroot, futerr + } + + src := func(cachedir string, an ProjectAnalyzer) sourceFuture { + var src source + var ident string + var err error + + c := make(chan struct{}, 1) + go func() { + defer close(c) + // make sure the metadata future is finished (without errors), thus + // guaranteeing that ru and vcs will be populated + _, err := root() + if err != nil { + return + } + ident = ru.String() + + var m maybeSource + switch vcs { + case "git": + m = maybeGitSource{url: ru} + case "bzr": + m = maybeBzrSource{url: ru} + case "hg": + m = maybeHgSource{url: ru} + } + + if m != nil { + src, ident, err = m.try(cachedir, an) + } else { + err = fmt.Errorf("unsupported vcs type %s", vcs) + } + }() + + return func() (source, string, error) { + <-c + return src, ident, err + } + } + + return deductionFuture{ + rslow: true, + root: root, + psf: src, + }, nil +} + +func normalizeURI(p string) (u *url.URL, newpath string, err error) { + if m := scpSyntaxRe.FindStringSubmatch(p); m != nil { + // Match SCP-like syntax and convert it to a URL. + // Eg, "git@github.com:user/repo" becomes + // "ssh://git@github.com/user/repo". + u = &url.URL{ + Scheme: "ssh", + User: url.User(m[1]), + Host: m[2], + Path: "/" + m[3], + // TODO(sdboyer) This is what stdlib sets; grok why better + //RawPath: m[3], + } + } else { + u, err = url.Parse(p) + if err != nil { + return nil, "", fmt.Errorf("%q is not a valid URI", p) + } + } + + // If no scheme was passed, then the entire path will have been put into + // u.Path. Either way, construct the normalized path correctly. + if u.Host == "" { + newpath = p + } else { + newpath = path.Join(u.Host, u.Path) + } + + if !pathvld.MatchString(newpath) { + return nil, "", fmt.Errorf("%q is not a valid import path", newpath) + } + + return +} + +// fetchMetadata fetches the remote metadata for path. +func fetchMetadata(path string) (rc io.ReadCloser, err error) { + defer func() { + if err != nil { + err = fmt.Errorf("unable to determine remote metadata protocol: %s", err) + } + }() + + // try https first + rc, err = doFetchMetadata("https", path) + if err == nil { + return + } + + rc, err = doFetchMetadata("http", path) + return +} + +func doFetchMetadata(scheme, path string) (io.ReadCloser, error) { + url := fmt.Sprintf("%s://%s?go-get=1", scheme, path) + switch scheme { + case "https", "http": + resp, err := http.Get(url) + if err != nil { + return nil, fmt.Errorf("failed to access url %q", url) + } + return resp.Body, nil + default: + return nil, fmt.Errorf("unknown remote protocol scheme: %q", scheme) + } +} + +// parseMetadata fetches and decodes remote metadata for path. +func parseMetadata(path string) (string, string, string, error) { + rc, err := fetchMetadata(path) + if err != nil { + return "", "", "", err + } + defer rc.Close() + + imports, err := parseMetaGoImports(rc) + if err != nil { + return "", "", "", err + } + match := -1 + for i, im := range imports { + if !strings.HasPrefix(path, im.Prefix) { + continue + } + if match != -1 { + return "", "", "", fmt.Errorf("multiple meta tags match import path %q", path) + } + match = i + } + if match == -1 { + return "", "", "", fmt.Errorf("go-import metadata not found") + } + return imports[match].Prefix, imports[match].VCS, imports[match].RepoRoot, nil +} diff --git a/deduce_test.go b/deduce_test.go new file mode 100644 index 0000000..23ffe38 --- /dev/null +++ b/deduce_test.go @@ -0,0 +1,619 @@ +package gps + +import ( + "bytes" + "errors" + "fmt" + "net/url" + "reflect" + "sync" + "testing" +) + +type pathDeductionFixture struct { + in string + root string + rerr error + mb maybeSource + srcerr error +} + +// helper func to generate testing *url.URLs, panicking on err +func mkurl(s string) (u *url.URL) { + var err error + u, err = url.Parse(s) + if err != nil { + panic(fmt.Sprint("string is not a valid URL:", s)) + } + return +} + +var pathDeductionFixtures = map[string][]pathDeductionFixture{ + "github": []pathDeductionFixture{ + { + in: "github.com/sdboyer/gps", + root: "github.com/sdboyer/gps", + mb: maybeSources{ + maybeGitSource{url: mkurl("https://github.com/sdboyer/gps")}, + maybeGitSource{url: mkurl("ssh://git@github.com/sdboyer/gps")}, + maybeGitSource{url: mkurl("git://github.com/sdboyer/gps")}, + maybeGitSource{url: mkurl("http://github.com/sdboyer/gps")}, + }, + }, + { + in: "github.com/sdboyer/gps/foo", + root: "github.com/sdboyer/gps", + mb: maybeSources{ + maybeGitSource{url: mkurl("https://github.com/sdboyer/gps")}, + maybeGitSource{url: mkurl("ssh://git@github.com/sdboyer/gps")}, + maybeGitSource{url: mkurl("git://github.com/sdboyer/gps")}, + maybeGitSource{url: mkurl("http://github.com/sdboyer/gps")}, + }, + }, + { + // TODO(sdboyer) is this a problem for enforcing uniqueness? do we + // need to collapse these extensions? + in: "github.com/sdboyer/gps.git/foo", + root: "github.com/sdboyer/gps.git", + mb: maybeSources{ + maybeGitSource{url: mkurl("https://github.com/sdboyer/gps.git")}, + maybeGitSource{url: mkurl("ssh://git@github.com/sdboyer/gps.git")}, + maybeGitSource{url: mkurl("git://github.com/sdboyer/gps.git")}, + maybeGitSource{url: mkurl("http://github.com/sdboyer/gps.git")}, + }, + }, + { + in: "git@github.com:sdboyer/gps", + root: "github.com/sdboyer/gps", + mb: maybeGitSource{url: mkurl("ssh://git@github.com/sdboyer/gps")}, + }, + { + in: "https://github.com/sdboyer/gps", + root: "github.com/sdboyer/gps", + mb: maybeGitSource{url: mkurl("https://github.com/sdboyer/gps")}, + }, + { + in: "https://github.com/sdboyer/gps/foo/bar", + root: "github.com/sdboyer/gps", + mb: maybeGitSource{url: mkurl("https://github.com/sdboyer/gps")}, + }, + // some invalid github username patterns + { + in: "github.com/-sdboyer/gps/foo", + rerr: errors.New("github.com/-sdboyer/gps/foo is not a valid path for a source on github.com"), + }, + { + in: "github.com/sdboyer-/gps/foo", + rerr: errors.New("github.com/sdboyer-/gps/foo is not a valid path for a source on github.com"), + }, + { + in: "github.com/sdbo.yer/gps/foo", + rerr: errors.New("github.com/sdbo.yer/gps/foo is not a valid path for a source on github.com"), + }, + { + in: "github.com/sdbo_yer/gps/foo", + rerr: errors.New("github.com/sdbo_yer/gps/foo is not a valid path for a source on github.com"), + }, + // Regression - gh does allow two-letter usernames + { + in: "github.com/kr/pretty", + root: "github.com/kr/pretty", + mb: maybeSources{ + maybeGitSource{url: mkurl("https://github.com/kr/pretty")}, + maybeGitSource{url: mkurl("ssh://git@github.com/kr/pretty")}, + maybeGitSource{url: mkurl("git://github.com/kr/pretty")}, + maybeGitSource{url: mkurl("http://github.com/kr/pretty")}, + }, + }, + }, + "gopkg.in": []pathDeductionFixture{ + { + in: "gopkg.in/sdboyer/gps.v0", + root: "gopkg.in/sdboyer/gps.v0", + mb: maybeSources{ + maybeGitSource{url: mkurl("https://github.com/sdboyer/gps")}, + maybeGitSource{url: mkurl("ssh://git@github.com/sdboyer/gps")}, + maybeGitSource{url: mkurl("git://github.com/sdboyer/gps")}, + maybeGitSource{url: mkurl("http://github.com/sdboyer/gps")}, + }, + }, + { + in: "gopkg.in/sdboyer/gps.v0/foo", + root: "gopkg.in/sdboyer/gps.v0", + mb: maybeSources{ + maybeGitSource{url: mkurl("https://github.com/sdboyer/gps")}, + maybeGitSource{url: mkurl("ssh://git@github.com/sdboyer/gps")}, + maybeGitSource{url: mkurl("git://github.com/sdboyer/gps")}, + maybeGitSource{url: mkurl("http://github.com/sdboyer/gps")}, + }, + }, + { + in: "gopkg.in/sdboyer/gps.v1/foo/bar", + root: "gopkg.in/sdboyer/gps.v1", + mb: maybeSources{ + maybeGitSource{url: mkurl("https://github.com/sdboyer/gps")}, + maybeGitSource{url: mkurl("ssh://git@github.com/sdboyer/gps")}, + maybeGitSource{url: mkurl("git://github.com/sdboyer/gps")}, + maybeGitSource{url: mkurl("http://github.com/sdboyer/gps")}, + }, + }, + { + in: "gopkg.in/yaml.v1", + root: "gopkg.in/yaml.v1", + mb: maybeSources{ + maybeGitSource{url: mkurl("https://github.com/go-yaml/yaml")}, + maybeGitSource{url: mkurl("ssh://git@github.com/go-yaml/yaml")}, + maybeGitSource{url: mkurl("git://github.com/go-yaml/yaml")}, + maybeGitSource{url: mkurl("http://github.com/go-yaml/yaml")}, + }, + }, + { + in: "gopkg.in/yaml.v1/foo/bar", + root: "gopkg.in/yaml.v1", + mb: maybeSources{ + maybeGitSource{url: mkurl("https://github.com/go-yaml/yaml")}, + maybeGitSource{url: mkurl("ssh://git@github.com/go-yaml/yaml")}, + maybeGitSource{url: mkurl("git://github.com/go-yaml/yaml")}, + maybeGitSource{url: mkurl("http://github.com/go-yaml/yaml")}, + }, + }, + { + in: "gopkg.in/inf.v0", + root: "gopkg.in/inf.v0", + mb: maybeSources{ + maybeGitSource{url: mkurl("https://github.com/go-inf/inf")}, + maybeGitSource{url: mkurl("ssh://git@github.com/go-inf/inf")}, + maybeGitSource{url: mkurl("git://github.com/go-inf/inf")}, + maybeGitSource{url: mkurl("http://github.com/go-inf/inf")}, + }, + }, + { + // gopkg.in only allows specifying major version in import path + in: "gopkg.in/yaml.v1.2", + rerr: errors.New("gopkg.in/yaml.v1.2 is not a valid import path; gopkg.in only allows major versions (\"v1\" instead of \"v1.2\")"), + }, + }, + "jazz": []pathDeductionFixture{ + // IBM hub devops services - fixtures borrowed from go get + { + in: "hub.jazz.net/git/user1/pkgname", + root: "hub.jazz.net/git/user1/pkgname", + mb: maybeGitSource{url: mkurl("https://hub.jazz.net/git/user1/pkgname")}, + }, + { + in: "hub.jazz.net/git/user1/pkgname/submodule/submodule/submodule", + root: "hub.jazz.net/git/user1/pkgname", + mb: maybeGitSource{url: mkurl("https://hub.jazz.net/git/user1/pkgname")}, + }, + { + in: "hub.jazz.net/someotherprefix", + rerr: errors.New("hub.jazz.net/someotherprefix is not a valid path for a source on hub.jazz.net"), + }, + { + in: "hub.jazz.net/someotherprefix/user1/packagename", + rerr: errors.New("hub.jazz.net/someotherprefix/user1/packagename is not a valid path for a source on hub.jazz.net"), + }, + // Spaces are not valid in user names or package names + { + in: "hub.jazz.net/git/User 1/pkgname", + rerr: errors.New("hub.jazz.net/git/User 1/pkgname is not a valid path for a source on hub.jazz.net"), + }, + { + in: "hub.jazz.net/git/user1/pkg name", + rerr: errors.New("hub.jazz.net/git/user1/pkg name is not a valid path for a source on hub.jazz.net"), + }, + // Dots are not valid in user names + { + in: "hub.jazz.net/git/user.1/pkgname", + rerr: errors.New("hub.jazz.net/git/user.1/pkgname is not a valid path for a source on hub.jazz.net"), + }, + { + in: "hub.jazz.net/git/user1/pkg.name", + root: "hub.jazz.net/git/user1/pkg.name", + mb: maybeGitSource{url: mkurl("https://hub.jazz.net/git/user1/pkg.name")}, + }, + // User names cannot have uppercase letters + { + in: "hub.jazz.net/git/USER/pkgname", + rerr: errors.New("hub.jazz.net/git/USER/pkgname is not a valid path for a source on hub.jazz.net"), + }, + }, + "bitbucket": []pathDeductionFixture{ + { + in: "bitbucket.org/sdboyer/reporoot", + root: "bitbucket.org/sdboyer/reporoot", + mb: maybeSources{ + maybeHgSource{url: mkurl("https://bitbucket.org/sdboyer/reporoot")}, + maybeHgSource{url: mkurl("ssh://hg@bitbucket.org/sdboyer/reporoot")}, + maybeHgSource{url: mkurl("http://bitbucket.org/sdboyer/reporoot")}, + maybeGitSource{url: mkurl("https://bitbucket.org/sdboyer/reporoot")}, + maybeGitSource{url: mkurl("ssh://git@bitbucket.org/sdboyer/reporoot")}, + maybeGitSource{url: mkurl("git://bitbucket.org/sdboyer/reporoot")}, + maybeGitSource{url: mkurl("http://bitbucket.org/sdboyer/reporoot")}, + }, + }, + { + in: "bitbucket.org/sdboyer/reporoot/foo/bar", + root: "bitbucket.org/sdboyer/reporoot", + mb: maybeSources{ + maybeHgSource{url: mkurl("https://bitbucket.org/sdboyer/reporoot")}, + maybeHgSource{url: mkurl("ssh://hg@bitbucket.org/sdboyer/reporoot")}, + maybeHgSource{url: mkurl("http://bitbucket.org/sdboyer/reporoot")}, + maybeGitSource{url: mkurl("https://bitbucket.org/sdboyer/reporoot")}, + maybeGitSource{url: mkurl("ssh://git@bitbucket.org/sdboyer/reporoot")}, + maybeGitSource{url: mkurl("git://bitbucket.org/sdboyer/reporoot")}, + maybeGitSource{url: mkurl("http://bitbucket.org/sdboyer/reporoot")}, + }, + }, + { + in: "https://bitbucket.org/sdboyer/reporoot/foo/bar", + root: "bitbucket.org/sdboyer/reporoot", + mb: maybeSources{ + maybeHgSource{url: mkurl("https://bitbucket.org/sdboyer/reporoot")}, + maybeGitSource{url: mkurl("https://bitbucket.org/sdboyer/reporoot")}, + }, + }, + // Less standard behaviors possible due to the hg/git ambiguity + { + in: "bitbucket.org/sdboyer/reporoot.git", + root: "bitbucket.org/sdboyer/reporoot.git", + mb: maybeSources{ + maybeGitSource{url: mkurl("https://bitbucket.org/sdboyer/reporoot.git")}, + maybeGitSource{url: mkurl("ssh://git@bitbucket.org/sdboyer/reporoot.git")}, + maybeGitSource{url: mkurl("git://bitbucket.org/sdboyer/reporoot.git")}, + maybeGitSource{url: mkurl("http://bitbucket.org/sdboyer/reporoot.git")}, + }, + }, + { + in: "git@bitbucket.org:sdboyer/reporoot.git", + root: "bitbucket.org/sdboyer/reporoot.git", + mb: maybeGitSource{url: mkurl("ssh://git@bitbucket.org/sdboyer/reporoot.git")}, + }, + { + in: "bitbucket.org/sdboyer/reporoot.hg", + root: "bitbucket.org/sdboyer/reporoot.hg", + mb: maybeSources{ + maybeHgSource{url: mkurl("https://bitbucket.org/sdboyer/reporoot.hg")}, + maybeHgSource{url: mkurl("ssh://hg@bitbucket.org/sdboyer/reporoot.hg")}, + maybeHgSource{url: mkurl("http://bitbucket.org/sdboyer/reporoot.hg")}, + }, + }, + { + in: "hg@bitbucket.org:sdboyer/reporoot", + root: "bitbucket.org/sdboyer/reporoot", + mb: maybeHgSource{url: mkurl("ssh://hg@bitbucket.org/sdboyer/reporoot")}, + }, + { + in: "git://bitbucket.org/sdboyer/reporoot.hg", + root: "bitbucket.org/sdboyer/reporoot.hg", + srcerr: errors.New("git is not a valid scheme for accessing an hg repository"), + }, + }, + "launchpad": []pathDeductionFixture{ + // tests for launchpad, mostly bazaar + // TODO(sdboyer) need more tests to deal w/launchpad's oddities + { + in: "launchpad.net/govcstestbzrrepo", + root: "launchpad.net/govcstestbzrrepo", + mb: maybeSources{ + maybeBzrSource{url: mkurl("https://launchpad.net/govcstestbzrrepo")}, + maybeBzrSource{url: mkurl("bzr+ssh://launchpad.net/govcstestbzrrepo")}, + maybeBzrSource{url: mkurl("bzr://launchpad.net/govcstestbzrrepo")}, + maybeBzrSource{url: mkurl("http://launchpad.net/govcstestbzrrepo")}, + }, + }, + { + in: "launchpad.net/govcstestbzrrepo/foo/bar", + root: "launchpad.net/govcstestbzrrepo", + mb: maybeSources{ + maybeBzrSource{url: mkurl("https://launchpad.net/govcstestbzrrepo")}, + maybeBzrSource{url: mkurl("bzr+ssh://launchpad.net/govcstestbzrrepo")}, + maybeBzrSource{url: mkurl("bzr://launchpad.net/govcstestbzrrepo")}, + maybeBzrSource{url: mkurl("http://launchpad.net/govcstestbzrrepo")}, + }, + }, + { + in: "launchpad.net/repo root", + rerr: errors.New("launchpad.net/repo root is not a valid path for a source on launchpad.net"), + }, + }, + "git.launchpad": []pathDeductionFixture{ + { + in: "git.launchpad.net/reporoot", + root: "git.launchpad.net/reporoot", + mb: maybeSources{ + maybeGitSource{url: mkurl("https://git.launchpad.net/reporoot")}, + maybeGitSource{url: mkurl("ssh://git.launchpad.net/reporoot")}, + maybeGitSource{url: mkurl("git://git.launchpad.net/reporoot")}, + maybeGitSource{url: mkurl("http://git.launchpad.net/reporoot")}, + }, + }, + { + in: "git.launchpad.net/reporoot/foo/bar", + root: "git.launchpad.net/reporoot", + mb: maybeSources{ + maybeGitSource{url: mkurl("https://git.launchpad.net/reporoot")}, + maybeGitSource{url: mkurl("ssh://git.launchpad.net/reporoot")}, + maybeGitSource{url: mkurl("git://git.launchpad.net/reporoot")}, + maybeGitSource{url: mkurl("http://git.launchpad.net/reporoot")}, + }, + }, + { + in: "git.launchpad.net/repo root", + rerr: errors.New("git.launchpad.net/repo root is not a valid path for a source on launchpad.net"), + }, + }, + "apache": []pathDeductionFixture{ + { + in: "git.apache.org/package-name.git", + root: "git.apache.org/package-name.git", + mb: maybeSources{ + maybeGitSource{url: mkurl("https://git.apache.org/package-name.git")}, + maybeGitSource{url: mkurl("ssh://git.apache.org/package-name.git")}, + maybeGitSource{url: mkurl("git://git.apache.org/package-name.git")}, + maybeGitSource{url: mkurl("http://git.apache.org/package-name.git")}, + }, + }, + { + in: "git.apache.org/package-name.git/foo/bar", + root: "git.apache.org/package-name.git", + mb: maybeSources{ + maybeGitSource{url: mkurl("https://git.apache.org/package-name.git")}, + maybeGitSource{url: mkurl("ssh://git.apache.org/package-name.git")}, + maybeGitSource{url: mkurl("git://git.apache.org/package-name.git")}, + maybeGitSource{url: mkurl("http://git.apache.org/package-name.git")}, + }, + }, + }, + "vcsext": []pathDeductionFixture{ + // VCS extension-based syntax + { + in: "foobar.com/baz.git", + root: "foobar.com/baz.git", + mb: maybeSources{ + maybeGitSource{url: mkurl("https://foobar.com/baz.git")}, + maybeGitSource{url: mkurl("ssh://foobar.com/baz.git")}, + maybeGitSource{url: mkurl("git://foobar.com/baz.git")}, + maybeGitSource{url: mkurl("http://foobar.com/baz.git")}, + }, + }, + { + in: "foobar.com/baz.git/extra/path", + root: "foobar.com/baz.git", + mb: maybeSources{ + maybeGitSource{url: mkurl("https://foobar.com/baz.git")}, + maybeGitSource{url: mkurl("ssh://foobar.com/baz.git")}, + maybeGitSource{url: mkurl("git://foobar.com/baz.git")}, + maybeGitSource{url: mkurl("http://foobar.com/baz.git")}, + }, + }, + { + in: "foobar.com/baz.bzr", + root: "foobar.com/baz.bzr", + mb: maybeSources{ + maybeBzrSource{url: mkurl("https://foobar.com/baz.bzr")}, + maybeBzrSource{url: mkurl("bzr+ssh://foobar.com/baz.bzr")}, + maybeBzrSource{url: mkurl("bzr://foobar.com/baz.bzr")}, + maybeBzrSource{url: mkurl("http://foobar.com/baz.bzr")}, + }, + }, + { + in: "foo-bar.com/baz.hg", + root: "foo-bar.com/baz.hg", + mb: maybeSources{ + maybeHgSource{url: mkurl("https://foo-bar.com/baz.hg")}, + maybeHgSource{url: mkurl("ssh://foo-bar.com/baz.hg")}, + maybeHgSource{url: mkurl("http://foo-bar.com/baz.hg")}, + }, + }, + { + in: "git@foobar.com:baz.git", + root: "foobar.com/baz.git", + mb: maybeGitSource{url: mkurl("ssh://git@foobar.com/baz.git")}, + }, + { + in: "bzr+ssh://foobar.com/baz.bzr", + root: "foobar.com/baz.bzr", + mb: maybeBzrSource{url: mkurl("bzr+ssh://foobar.com/baz.bzr")}, + }, + { + in: "ssh://foobar.com/baz.bzr", + root: "foobar.com/baz.bzr", + mb: maybeBzrSource{url: mkurl("ssh://foobar.com/baz.bzr")}, + }, + { + in: "https://foobar.com/baz.hg", + root: "foobar.com/baz.hg", + mb: maybeHgSource{url: mkurl("https://foobar.com/baz.hg")}, + }, + { + in: "git://foobar.com/baz.hg", + root: "foobar.com/baz.hg", + srcerr: errors.New("git is not a valid scheme for accessing hg repositories (path foobar.com/baz.hg)"), + }, + // who knows why anyone would do this, but having a second vcs ext + // shouldn't throw us off - only the first one counts + { + in: "foobar.com/baz.git/quark/quizzle.bzr/quorum", + root: "foobar.com/baz.git", + mb: maybeSources{ + maybeGitSource{url: mkurl("https://foobar.com/baz.git")}, + maybeGitSource{url: mkurl("ssh://foobar.com/baz.git")}, + maybeGitSource{url: mkurl("git://foobar.com/baz.git")}, + maybeGitSource{url: mkurl("http://foobar.com/baz.git")}, + }, + }, + }, + "vanity": []pathDeductionFixture{ + // Vanity imports + { + in: "golang.org/x/exp", + root: "golang.org/x/exp", + mb: maybeGitSource{url: mkurl("https://go.googlesource.com/exp")}, + }, + { + in: "golang.org/x/exp/inotify", + root: "golang.org/x/exp", + mb: maybeGitSource{url: mkurl("https://go.googlesource.com/exp")}, + }, + { + in: "rsc.io/pdf", + root: "rsc.io/pdf", + mb: maybeGitSource{url: mkurl("https://github.com/rsc/pdf")}, + }, + }, +} + +func TestDeduceFromPath(t *testing.T) { + for typ, fixtures := range pathDeductionFixtures { + var deducer pathDeducer + switch typ { + case "github": + deducer = githubDeducer{regexp: ghRegex} + case "gopkg.in": + deducer = gopkginDeducer{regexp: gpinNewRegex} + case "jazz": + deducer = jazzDeducer{regexp: jazzRegex} + case "bitbucket": + deducer = bitbucketDeducer{regexp: bbRegex} + case "launchpad": + deducer = launchpadDeducer{regexp: lpRegex} + case "git.launchpad": + deducer = launchpadGitDeducer{regexp: glpRegex} + case "apache": + deducer = apacheDeducer{regexp: apacheRegex} + case "vcsext": + deducer = vcsExtensionDeducer{regexp: vcsExtensionRegex} + default: + // Should just be the vanity imports, which we do elsewhere + continue + } + + var printmb func(mb maybeSource) string + printmb = func(mb maybeSource) string { + switch tmb := mb.(type) { + case maybeSources: + var buf bytes.Buffer + fmt.Fprintf(&buf, "%v maybeSources:", len(tmb)) + for _, elem := range tmb { + fmt.Fprintf(&buf, "\n\t\t%s", printmb(elem)) + } + return buf.String() + case maybeGitSource: + return fmt.Sprintf("%T: %s", tmb, ufmt(tmb.url)) + case maybeBzrSource: + return fmt.Sprintf("%T: %s", tmb, ufmt(tmb.url)) + case maybeHgSource: + return fmt.Sprintf("%T: %s", tmb, ufmt(tmb.url)) + default: + t.Errorf("Unknown maybeSource type: %T", mb) + t.FailNow() + } + return "" + } + + for _, fix := range fixtures { + u, in, uerr := normalizeURI(fix.in) + if uerr != nil { + if fix.rerr == nil { + t.Errorf("(in: %s) bad input URI %s", fix.in, uerr) + } + continue + } + + root, rerr := deducer.deduceRoot(in) + if fix.rerr != nil { + if rerr == nil { + t.Errorf("(in: %s, %T) Expected error on deducing root, got none:\n\t(WNT) %s", in, deducer, fix.rerr) + } else if fix.rerr.Error() != rerr.Error() { + t.Errorf("(in: %s, %T) Got unexpected error on deducing root:\n\t(GOT) %s\n\t(WNT) %s", in, deducer, rerr, fix.rerr) + } + } else if rerr != nil { + t.Errorf("(in: %s, %T) Got unexpected error on deducing root:\n\t(GOT) %s", in, deducer, rerr) + } else if root != fix.root { + t.Errorf("(in: %s, %T) Deducer did not return expected root:\n\t(GOT) %s\n\t(WNT) %s", in, deducer, root, fix.root) + } + + mb, mberr := deducer.deduceSource(in, u) + if fix.srcerr != nil { + if mberr == nil { + t.Errorf("(in: %s, %T) Expected error on deducing source, got none:\n\t(WNT) %s", in, deducer, fix.srcerr) + } else if fix.srcerr.Error() != mberr.Error() { + t.Errorf("(in: %s, %T) Got unexpected error on deducing source:\n\t(GOT) %s\n\t(WNT) %s", in, deducer, mberr, fix.srcerr) + } + } else if mberr != nil { + // don't complain the fix already expected an rerr + if fix.rerr == nil { + t.Errorf("(in: %s, %T) Got unexpected error on deducing source:\n\t(GOT) %s", in, deducer, mberr) + } + } else if !reflect.DeepEqual(mb, fix.mb) { + if mb == nil { + t.Errorf("(in: %s, %T) Deducer returned source maybes, but none expected:\n\t(GOT) (none)\n\t(WNT) %s", in, deducer, printmb(fix.mb)) + } else if fix.mb == nil { + t.Errorf("(in: %s, %T) Deducer returned source maybes, but none expected:\n\t(GOT) %s\n\t(WNT) (none)", in, deducer, printmb(mb)) + } else { + t.Errorf("(in: %s, %T) Deducer did not return expected source:\n\t(GOT) %s\n\t(WNT) %s", in, deducer, printmb(mb), printmb(fix.mb)) + } + } + } + } +} + +func TestVanityDeduction(t *testing.T) { + if testing.Short() { + t.Skip("Skipping slow test in short mode") + } + + sm, clean := mkNaiveSM(t) + defer clean() + + vanities := pathDeductionFixtures["vanity"] + wg := &sync.WaitGroup{} + wg.Add(len(vanities)) + + for _, fix := range vanities { + go func(fix pathDeductionFixture) { + defer wg.Done() + pr, err := sm.DeduceProjectRoot(fix.in) + if err != nil { + t.Errorf("(in: %s) Unexpected err on deducing project root: %s", fix.in, err) + return + } else if string(pr) != fix.root { + t.Errorf("(in: %s) Deducer did not return expected root:\n\t(GOT) %s\n\t(WNT) %s", fix.in, pr, fix.root) + } + + _, srcf, err := sm.deducePathAndProcess(fix.in) + if err != nil { + t.Errorf("(in: %s) Unexpected err on deducing source: %s", fix.in, err) + return + } + + _, ident, err := srcf() + if err != nil { + t.Errorf("(in: %s) Unexpected err on executing source future: %s", fix.in, err) + return + } + + ustr := fix.mb.(maybeGitSource).url.String() + if ident != ustr { + t.Errorf("(in: %s) Deduced repo ident does not match fixture:\n\t(GOT) %s\n\t(WNT) %s", fix.in, ident, ustr) + } + }(fix) + } + + wg.Wait() +} + +// borrow from stdlib +// more useful string for debugging than fmt's struct printer +func ufmt(u *url.URL) string { + var user, pass interface{} + if u.User != nil { + user = u.User.Username() + if p, ok := u.User.Password(); ok { + pass = p + } + } + return fmt.Sprintf("host=%q, path=%q, opaque=%q, scheme=%q, user=%#v, pass=%#v, rawpath=%q, rawq=%q, frag=%q", + u.Host, u.Path, u.Opaque, u.Scheme, user, pass, u.RawPath, u.RawQuery, u.Fragment) +} diff --git a/flags.go b/flags.go index a7172c1..d9a3a1d 100644 --- a/flags.go +++ b/flags.go @@ -1,7 +1,7 @@ package gps -// projectExistence values represent the extent to which a project "exists." -type projectExistence uint8 +// sourceExistence values represent the extent to which a project "exists." +type sourceExistence uint8 const ( // ExistsInVendorRoot indicates that a project exists in a vendor directory @@ -19,7 +19,7 @@ const ( // // In short, the information encoded in this flag should not be construed as // exhaustive. - existsInVendorRoot projectExistence = 1 << iota + existsInVendorRoot sourceExistence = 1 << iota // ExistsInCache indicates that a project exists on-disk in the local cache. // It does not guarantee that an upstream exists, thus it cannot imply diff --git a/hash.go b/hash.go index e336aaf..893c34e 100644 --- a/hash.go +++ b/hash.go @@ -20,7 +20,7 @@ func (s *solver) HashInputs() ([]byte, error) { // Do these checks up front before any other work is needed, as they're the // only things that can cause errors // Pass in magic root values, and the bridge will analyze the right thing - ptree, err := s.b.listPackages(ProjectIdentifier{ProjectRoot: s.params.ImportRoot}, nil) + ptree, err := s.b.ListPackages(ProjectIdentifier{ProjectRoot: s.params.ImportRoot}, nil) if err != nil { return nil, badOptsFailure(fmt.Sprintf("Error while parsing packages under %s: %s", s.params.RootDir, err.Error())) } @@ -93,7 +93,7 @@ func (s *solver) HashInputs() ([]byte, error) { } } - an, av := s.b.analyzerInfo() + an, av := s.b.AnalyzerInfo() h.Write([]byte(an)) h.Write([]byte(av.String())) diff --git a/manager_test.go b/manager_test.go index ae65ef4..439d8b4 100644 --- a/manager_test.go +++ b/manager_test.go @@ -5,8 +5,10 @@ import ( "io/ioutil" "os" "path" + "path/filepath" "runtime" "sort" + "sync" "testing" "github.com/Masterminds/semver" @@ -36,6 +38,28 @@ func sv(s string) *semver.Version { return sv } +func mkNaiveSM(t *testing.T) (*SourceMgr, func()) { + cpath, err := ioutil.TempDir("", "smcache") + if err != nil { + t.Errorf("Failed to create temp dir: %s", err) + t.FailNow() + } + + sm, err := NewSourceManager(naiveAnalyzer{}, cpath, false) + if err != nil { + t.Errorf("Unexpected error on SourceManager creation: %s", err) + t.FailNow() + } + + return sm, func() { + sm.Release() + err := removeAll(cpath) + if err != nil { + t.Errorf("removeAll failed: %s", err) + } + } +} + func init() { _, filename, _, _ := runtime.Caller(1) bd = path.Dir(filename) @@ -83,23 +107,25 @@ func TestProjectManagerInit(t *testing.T) { cpath, err := ioutil.TempDir("", "smcache") if err != nil { t.Errorf("Failed to create temp dir: %s", err) + t.FailNow() } - sm, err := NewSourceManager(naiveAnalyzer{}, cpath, false) + sm, err := NewSourceManager(naiveAnalyzer{}, cpath, false) if err != nil { t.Errorf("Unexpected error on SourceManager creation: %s", err) t.FailNow() } + defer func() { + sm.Release() err := removeAll(cpath) if err != nil { t.Errorf("removeAll failed: %s", err) } }() - defer sm.Release() - pn := ProjectRoot("github.com/Masterminds/VCSTestRepo") - v, err := sm.ListVersions(pn) + id := mkPI("github.com/Masterminds/VCSTestRepo") + v, err := sm.ListVersions(id) if err != nil { t.Errorf("Unexpected error during initial project setup/fetching %s", err) } @@ -126,15 +152,15 @@ func TestProjectManagerInit(t *testing.T) { } // Two birds, one stone - make sure the internal ProjectManager vlist cache - // works by asking for the versions again, and do it through smcache to - // ensure its sorting works, as well. + // works (or at least doesn't not work) by asking for the versions again, + // and do it through smcache to ensure its sorting works, as well. smc := &bridge{ sm: sm, - vlists: make(map[ProjectRoot][]Version), + vlists: make(map[ProjectIdentifier][]Version), s: &solver{}, } - v, err = smc.listVersions(ProjectIdentifier{ProjectRoot: pn}) + v, err = smc.ListVersions(id) if err != nil { t.Errorf("Unexpected error during initial project setup/fetching %s", err) } @@ -156,191 +182,274 @@ func TestProjectManagerInit(t *testing.T) { } } + // use ListPackages to ensure the repo is actually on disk + // TODO(sdboyer) ugh, maybe we do need an explicit prefetch method + smc.ListPackages(id, NewVersion("1.0.0")) + // Ensure that the appropriate cache dirs and files exist - _, err = os.Stat(path.Join(cpath, "src", "github.com", "Masterminds", "VCSTestRepo", ".git")) + _, err = os.Stat(filepath.Join(cpath, "sources", "https---github.com-Masterminds-VCSTestRepo", ".git")) if err != nil { t.Error("Cache repo does not exist in expected location") } - _, err = os.Stat(path.Join(cpath, "metadata", "github.com", "Masterminds", "VCSTestRepo", "cache.json")) + _, err = os.Stat(filepath.Join(cpath, "metadata", "github.com", "Masterminds", "VCSTestRepo", "cache.json")) if err != nil { - // TODO(sdboyer) temporarily disabled until we turn caching back on + // TODO(sdboyer) disabled until we get caching working //t.Error("Metadata cache json file does not exist in expected location") } - // Ensure project existence values are what we expect + // Ensure source existence values are what we expect var exists bool - exists, err = sm.RepoExists(pn) + exists, err = sm.SourceExists(id) if err != nil { - t.Errorf("Error on checking RepoExists: %s", err) + t.Errorf("Error on checking SourceExists: %s", err) } if !exists { - t.Error("Repo should exist after non-erroring call to ListVersions") + t.Error("Source should exist after non-erroring call to ListVersions") } +} - // Now reach inside the black box - pms, err := sm.getProjectManager(pn) - if err != nil { - t.Errorf("Error on grabbing project manager obj: %s", err) +func TestGetSources(t *testing.T) { + // This test is a tad slow, skip it on -short + if testing.Short() { + t.Skip("Skipping source setup test in short mode") } - // Check upstream existence flag - if !pms.pm.CheckExistence(existsUpstream) { - t.Errorf("ExistsUpstream flag not being correctly set the project") + sm, clean := mkNaiveSM(t) + + pil := []ProjectIdentifier{ + mkPI("github.com/Masterminds/VCSTestRepo"), + mkPI("bitbucket.org/mattfarina/testhgrepo"), + mkPI("launchpad.net/govcstestbzrrepo"), } + + wg := &sync.WaitGroup{} + wg.Add(3) + for _, pi := range pil { + go func(lpi ProjectIdentifier) { + nn := lpi.netName() + src, err := sm.getSourceFor(lpi) + if err != nil { + t.Errorf("(src %q) unexpected error setting up source: %s", nn, err) + return + } + + // Re-get the same, make sure they are the same + src2, err := sm.getSourceFor(lpi) + if err != nil { + t.Errorf("(src %q) unexpected error re-getting source: %s", nn, err) + } else if src != src2 { + t.Errorf("(src %q) first and second sources are not eq", nn) + } + + // All of them _should_ select https, so this should work + lpi.NetworkName = "https://" + lpi.NetworkName + src3, err := sm.getSourceFor(lpi) + if err != nil { + t.Errorf("(src %q) unexpected error getting explicit https source: %s", nn, err) + } else if src != src3 { + t.Errorf("(src %q) explicit https source should reuse autodetected https source", nn) + } + + // Now put in http, and they should differ + lpi.NetworkName = "http://" + string(lpi.ProjectRoot) + src4, err := sm.getSourceFor(lpi) + if err != nil { + t.Errorf("(src %q) unexpected error getting explicit http source: %s", nn, err) + } else if src == src4 { + t.Errorf("(src %q) explicit http source should create a new src", nn) + } + + wg.Done() + }(pi) + } + + wg.Wait() + + // nine entries (of which three are dupes): for each vcs, raw import path, + // the https url, and the http url + if len(sm.srcs) != 9 { + t.Errorf("Should have nine discrete entries in the srcs map, got %v", len(sm.srcs)) + } + clean() } -func TestRepoVersionFetching(t *testing.T) { +// Regression test for #32 +func TestGetInfoListVersionsOrdering(t *testing.T) { // This test is quite slow, skip it on -short if testing.Short() { - t.Skip("Skipping repo version fetching test in short mode") + t.Skip("Skipping slow test in short mode") } - cpath, err := ioutil.TempDir("", "smcache") - if err != nil { - t.Errorf("Failed to create temp dir: %s", err) - } + sm, clean := mkNaiveSM(t) + defer clean() - sm, err := NewSourceManager(naiveAnalyzer{}, cpath, false) + // setup done, now do the test + + id := mkPI("github.com/Masterminds/VCSTestRepo") + + _, _, err := sm.GetManifestAndLock(id, NewVersion("1.0.0")) if err != nil { - t.Errorf("Unexpected error on SourceManager creation: %s", err) - t.FailNow() + t.Errorf("Unexpected error from GetInfoAt %s", err) } - upstreams := []ProjectRoot{ - "github.com/Masterminds/VCSTestRepo", - "bitbucket.org/mattfarina/testhgrepo", - "launchpad.net/govcstestbzrrepo", + v, err := sm.ListVersions(id) + if err != nil { + t.Errorf("Unexpected error from ListVersions %s", err) } - pms := make([]*projectManager, len(upstreams)) - for k, u := range upstreams { - pmi, err := sm.getProjectManager(u) - if err != nil { - sm.Release() - removeAll(cpath) - t.Errorf("Unexpected error on ProjectManager creation: %s", err) - t.FailNow() - } - pms[k] = pmi.pm + if len(v) != 3 { + t.Errorf("Expected three results from ListVersions, got %v", len(v)) } +} - defer func() { - err := removeAll(cpath) - if err != nil { - t.Errorf("removeAll failed: %s", err) - } - }() - defer sm.Release() +func TestDeduceProjectRoot(t *testing.T) { + sm, clean := mkNaiveSM(t) + defer clean() - // test git first - vlist, exbits, err := pms[0].crepo.getCurrentVersionPairs() + in := "github.com/sdboyer/gps" + pr, err := sm.DeduceProjectRoot(in) if err != nil { - t.Errorf("Unexpected error getting version pairs from git repo: %s", err) + t.Errorf("Problem while detecting root of %q %s", in, err) } - if exbits != existsUpstream { - t.Errorf("git pair fetch should only set upstream existence bits, but got %v", exbits) + if string(pr) != in { + t.Errorf("Wrong project root was deduced;\n\t(GOT) %s\n\t(WNT) %s", pr, in) } - if len(vlist) != 3 { - t.Errorf("git test repo should've produced three versions, got %v", len(vlist)) - } else { - v := NewBranch("master").Is(Revision("30605f6ac35fcb075ad0bfa9296f90a7d891523e")) - if vlist[0] != v { - t.Errorf("git pair fetch reported incorrect first version, got %s", vlist[0]) - } - - v = NewBranch("test").Is(Revision("30605f6ac35fcb075ad0bfa9296f90a7d891523e")) - if vlist[1] != v { - t.Errorf("git pair fetch reported incorrect second version, got %s", vlist[1]) - } - - v = NewVersion("1.0.0").Is(Revision("30605f6ac35fcb075ad0bfa9296f90a7d891523e")) - if vlist[2] != v { - t.Errorf("git pair fetch reported incorrect third version, got %s", vlist[2]) - } + if sm.rootxt.Len() != 1 { + t.Errorf("Root path trie should have one element after one deduction, has %v", sm.rootxt.Len()) } - // now hg - vlist, exbits, err = pms[1].crepo.getCurrentVersionPairs() + pr, err = sm.DeduceProjectRoot(in) if err != nil { - t.Errorf("Unexpected error getting version pairs from hg repo: %s", err) + t.Errorf("Problem while detecting root of %q %s", in, err) + } else if string(pr) != in { + t.Errorf("Wrong project root was deduced;\n\t(GOT) %s\n\t(WNT) %s", pr, in) } - if exbits != existsUpstream|existsInCache { - t.Errorf("hg pair fetch should set upstream and cache existence bits, but got %v", exbits) + if sm.rootxt.Len() != 1 { + t.Errorf("Root path trie should still have one element after performing the same deduction twice; has %v", sm.rootxt.Len()) } - if len(vlist) != 2 { - t.Errorf("hg test repo should've produced two versions, got %v", len(vlist)) - } else { - v := NewVersion("1.0.0").Is(Revision("d680e82228d206935ab2eaa88612587abe68db07")) - if vlist[0] != v { - t.Errorf("hg pair fetch reported incorrect first version, got %s", vlist[0]) - } - v = NewBranch("test").Is(Revision("6c44ee3fe5d87763616c19bf7dbcadb24ff5a5ce")) - if vlist[1] != v { - t.Errorf("hg pair fetch reported incorrect second version, got %s", vlist[1]) - } + // Now do a subpath + sub := path.Join(in, "foo") + pr, err = sm.DeduceProjectRoot(sub) + if err != nil { + t.Errorf("Problem while detecting root of %q %s", sub, err) + } else if string(pr) != in { + t.Errorf("Wrong project root was deduced;\n\t(GOT) %s\n\t(WNT) %s", pr, in) + } + if sm.rootxt.Len() != 2 { + t.Errorf("Root path trie should have two elements, one for root and one for subpath; has %v", sm.rootxt.Len()) } - // bzr last - vlist, exbits, err = pms[2].crepo.getCurrentVersionPairs() + // Now do a fully different root, but still on github + in2 := "github.com/bagel/lox" + sub2 := path.Join(in2, "cheese") + pr, err = sm.DeduceProjectRoot(sub2) if err != nil { - t.Errorf("Unexpected error getting version pairs from bzr repo: %s", err) - } - if exbits != existsUpstream|existsInCache { - t.Errorf("bzr pair fetch should set upstream and cache existence bits, but got %v", exbits) + t.Errorf("Problem while detecting root of %q %s", sub2, err) + } else if string(pr) != in2 { + t.Errorf("Wrong project root was deduced;\n\t(GOT) %s\n\t(WNT) %s", pr, in) } - if len(vlist) != 1 { - t.Errorf("bzr test repo should've produced one version, got %v", len(vlist)) - } else { - v := NewVersion("1.0.0").Is(Revision("matt@mattfarina.com-20150731135137-pbphasfppmygpl68")) - if vlist[0] != v { - t.Errorf("bzr pair fetch reported incorrect first version, got %s", vlist[0]) - } + if sm.rootxt.Len() != 4 { + t.Errorf("Root path trie should have four elements, one for each unique root and subpath; has %v", sm.rootxt.Len()) } - // no svn for now, because...svn -} -// Regression test for #32 -func TestGetInfoListVersionsOrdering(t *testing.T) { - // This test is quite slow, skip it on -short - if testing.Short() { - t.Skip("Skipping slow test in short mode") + // Ensure that our prefixes are bounded by path separators + in4 := "github.com/bagel/loxx" + pr, err = sm.DeduceProjectRoot(in4) + if err != nil { + t.Errorf("Problem while detecting root of %q %s", in4, err) + } else if string(pr) != in4 { + t.Errorf("Wrong project root was deduced;\n\t(GOT) %s\n\t(WNT) %s", pr, in) + } + if sm.rootxt.Len() != 5 { + t.Errorf("Root path trie should have five elements, one for each unique root and subpath; has %v", sm.rootxt.Len()) } - cpath, err := ioutil.TempDir("", "smcache") + // Ensure that vcs extension-based matching comes through + in5 := "ffffrrrraaaaaapppppdoesnotresolve.com/baz.git" + pr, err = sm.DeduceProjectRoot(in5) if err != nil { - t.Errorf("Failed to create temp dir: %s", err) + t.Errorf("Problem while detecting root of %q %s", in5, err) + } else if string(pr) != in5 { + t.Errorf("Wrong project root was deduced;\n\t(GOT) %s\n\t(WNT) %s", pr, in) } - sm, err := NewSourceManager(naiveAnalyzer{}, cpath, false) + if sm.rootxt.Len() != 6 { + t.Errorf("Root path trie should have six elements, one for each unique root and subpath; has %v", sm.rootxt.Len()) + } +} +// Test that the future returned from SourceMgr.deducePathAndProcess() is safe +// to call concurrently. +// +// Obviously, this is just a heuristic; passage does not guarantee correctness +// (though failure does guarantee incorrectness) +func TestMultiDeduceThreadsafe(t *testing.T) { + sm, clean := mkNaiveSM(t) + defer clean() + + in := "github.com/sdboyer/gps" + rootf, srcf, err := sm.deducePathAndProcess(in) if err != nil { - t.Errorf("Unexpected error on SourceManager creation: %s", err) + t.Errorf("Known-good path %q had unexpected basic deduction error: %s", in, err) t.FailNow() } - defer func() { - err := removeAll(cpath) - if err != nil { - t.Errorf("removeAll failed: %s", err) - } - }() - defer sm.Release() - // setup done, now do the test + cnum := 50 + wg := &sync.WaitGroup{} - pn := ProjectRoot("github.com/Masterminds/VCSTestRepo") + // Set up channel for everything else to block on + c := make(chan struct{}, 1) + f := func(rnum int) { + defer func() { + wg.Done() + if e := recover(); e != nil { + t.Errorf("goroutine number %v panicked with err: %s", rnum, e) + } + }() + <-c + _, err := rootf() + if err != nil { + t.Errorf("err was non-nil on root detection in goroutine number %v: %s", rnum, err) + } + } - _, _, err = sm.GetManifestAndLock(pn, NewVersion("1.0.0")) - if err != nil { - t.Errorf("Unexpected error from GetInfoAt %s", err) + for k := range make([]struct{}, cnum) { + wg.Add(1) + go f(k) + runtime.Gosched() + } + close(c) + wg.Wait() + if sm.rootxt.Len() != 1 { + t.Errorf("Root path trie should have just one element; has %v", sm.rootxt.Len()) } - v, err := sm.ListVersions(pn) - if err != nil { - t.Errorf("Unexpected error from ListVersions %s", err) + // repeat for srcf + wg2 := &sync.WaitGroup{} + c = make(chan struct{}, 1) + f = func(rnum int) { + defer func() { + wg2.Done() + if e := recover(); e != nil { + t.Errorf("goroutine number %v panicked with err: %s", rnum, e) + } + }() + <-c + _, _, err := srcf() + if err != nil { + t.Errorf("err was non-nil on root detection in goroutine number %v: %s", rnum, err) + } } - if len(v) != 3 { - t.Errorf("Expected three results from ListVersions, got %v", len(v)) + for k := range make([]struct{}, cnum) { + wg2.Add(1) + go f(k) + runtime.Gosched() + } + close(c) + wg2.Wait() + if len(sm.srcs) != 2 { + t.Errorf("Sources map should have just two elements, but has %v", len(sm.srcs)) } } diff --git a/maybe_source.go b/maybe_source.go new file mode 100644 index 0000000..34fd5d5 --- /dev/null +++ b/maybe_source.go @@ -0,0 +1,153 @@ +package gps + +import ( + "bytes" + "fmt" + "net/url" + "path/filepath" + + "github.com/Masterminds/vcs" +) + +type maybeSource interface { + try(cachedir string, an ProjectAnalyzer) (source, string, error) +} + +type maybeSources []maybeSource + +func (mbs maybeSources) try(cachedir string, an ProjectAnalyzer) (source, string, error) { + var e sourceFailures + for _, mb := range mbs { + src, ident, err := mb.try(cachedir, an) + if err == nil { + return src, ident, nil + } + e = append(e, sourceSetupFailure{ + ident: ident, + err: err, + }) + } + return nil, "", e +} + +type sourceSetupFailure struct { + ident string + err error +} + +func (e sourceSetupFailure) Error() string { + return fmt.Sprintf("failed to set up %q, error %s", e.ident, e.err.Error()) +} + +type sourceFailures []sourceSetupFailure + +func (sf sourceFailures) Error() string { + var buf bytes.Buffer + fmt.Fprintf(&buf, "No valid source could be created:\n") + for _, e := range sf { + fmt.Fprintf(&buf, "\t%s", e.Error()) + } + + return buf.String() +} + +type maybeGitSource struct { + url *url.URL +} + +func (m maybeGitSource) try(cachedir string, an ProjectAnalyzer) (source, string, error) { + ustr := m.url.String() + path := filepath.Join(cachedir, "sources", sanitizer.Replace(ustr)) + r, err := vcs.NewGitRepo(ustr, path) + if err != nil { + return nil, "", err + } + + src := &gitSource{ + baseVCSSource: baseVCSSource{ + an: an, + dc: newMetaCache(), + crepo: &repo{ + r: r, + rpath: path, + }, + }, + } + + src.baseVCSSource.lvfunc = src.listVersions + + _, err = src.listVersions() + if err != nil { + return nil, "", err + } + + return src, ustr, nil +} + +type maybeBzrSource struct { + url *url.URL +} + +func (m maybeBzrSource) try(cachedir string, an ProjectAnalyzer) (source, string, error) { + ustr := m.url.String() + path := filepath.Join(cachedir, "sources", sanitizer.Replace(ustr)) + r, err := vcs.NewBzrRepo(ustr, path) + if err != nil { + return nil, "", err + } + if !r.Ping() { + return nil, "", fmt.Errorf("Remote repository at %s does not exist, or is inaccessible", ustr) + } + + src := &bzrSource{ + baseVCSSource: baseVCSSource{ + an: an, + dc: newMetaCache(), + ex: existence{ + s: existsUpstream, + f: existsUpstream, + }, + crepo: &repo{ + r: r, + rpath: path, + }, + }, + } + src.baseVCSSource.lvfunc = src.listVersions + + return src, ustr, nil +} + +type maybeHgSource struct { + url *url.URL +} + +func (m maybeHgSource) try(cachedir string, an ProjectAnalyzer) (source, string, error) { + ustr := m.url.String() + path := filepath.Join(cachedir, "sources", sanitizer.Replace(ustr)) + r, err := vcs.NewHgRepo(ustr, path) + if err != nil { + return nil, "", err + } + if !r.Ping() { + return nil, "", fmt.Errorf("Remote repository at %s does not exist, or is inaccessible", ustr) + } + + src := &hgSource{ + baseVCSSource: baseVCSSource{ + an: an, + dc: newMetaCache(), + ex: existence{ + s: existsUpstream, + f: existsUpstream, + }, + crepo: &repo{ + r: r, + rpath: path, + }, + }, + } + src.baseVCSSource.lvfunc = src.listVersions + + return src, ustr, nil +} diff --git a/project_manager.go b/project_manager.go deleted file mode 100644 index 6587a0c..0000000 --- a/project_manager.go +++ /dev/null @@ -1,584 +0,0 @@ -package gps - -import ( - "bytes" - "fmt" - "go/build" - "os" - "os/exec" - "path" - "path/filepath" - "strings" - "sync" - - "github.com/Masterminds/vcs" - "github.com/termie/go-shutil" -) - -type projectManager struct { - // The identifier of the project. At this level, corresponds to the - // '$GOPATH/src'-relative path, *and* the network name. - n ProjectRoot - - // build.Context to use in any analysis, and to pass to the analyzer - ctx build.Context - - // Object for the cache repository - crepo *repo - - // Indicates the extent to which we have searched for, and verified, the - // existence of the project/repo. - ex existence - - // Analyzer, injected by way of the SourceManager and originally from the - // sm's creator - an ProjectAnalyzer - - // Whether the cache has the latest info on versions - cvsync bool - - // The project metadata cache. This is persisted to disk, for reuse across - // solver runs. - // TODO(sdboyer) protect with mutex - dc *projectDataCache -} - -type existence struct { - // The existence levels for which a search/check has been performed - s projectExistence - - // The existence levels verified to be present through searching - f projectExistence -} - -// TODO(sdboyer) figure out shape of versions, then implement marshaling/unmarshaling -type projectDataCache struct { - Version string `json:"version"` // TODO(sdboyer) use this - Infos map[Revision]projectInfo `json:"infos"` - Packages map[Revision]PackageTree `json:"packages"` - VMap map[Version]Revision `json:"vmap"` - RMap map[Revision][]Version `json:"rmap"` -} - -// projectInfo holds manifest and lock -type projectInfo struct { - Manifest - Lock -} - -type repo struct { - // Path to the root of the default working copy (NOT the repo itself) - rpath string - - // Mutex controlling general access to the repo - mut sync.RWMutex - - // Object for direct repo interaction - r vcs.Repo - - // Whether or not the cache repo is in sync (think dvcs) with upstream - synced bool -} - -func (pm *projectManager) GetInfoAt(v Version) (Manifest, Lock, error) { - if err := pm.ensureCacheExistence(); err != nil { - return nil, nil, err - } - - if r, exists := pm.dc.VMap[v]; exists { - if pi, exists := pm.dc.Infos[r]; exists { - return pi.Manifest, pi.Lock, nil - } - } - - pm.crepo.mut.Lock() - var err error - if !pm.crepo.synced { - err = pm.crepo.r.Update() - if err != nil { - return nil, nil, fmt.Errorf("Could not fetch latest updates into repository") - } - pm.crepo.synced = true - } - - // Always prefer a rev, if it's available - if pv, ok := v.(PairedVersion); ok { - err = pm.crepo.r.UpdateVersion(pv.Underlying().String()) - } else { - err = pm.crepo.r.UpdateVersion(v.String()) - } - pm.crepo.mut.Unlock() - if err != nil { - // TODO(sdboyer) More-er proper-er error - panic(fmt.Sprintf("canary - why is checkout/whatever failing: %s %s %s", pm.n, v.String(), err)) - } - - pm.crepo.mut.RLock() - m, l, err := pm.an.DeriveManifestAndLock(filepath.Join(pm.ctx.GOPATH, "src", string(pm.n)), pm.n) - // TODO(sdboyer) cache results - pm.crepo.mut.RUnlock() - - if err == nil { - if l != nil { - l = prepLock(l) - } - - // If m is nil, prepManifest will provide an empty one. - pi := projectInfo{ - Manifest: prepManifest(m), - Lock: l, - } - - // TODO(sdboyer) this just clobbers all over and ignores the paired/unpaired - // distinction; serious fix is needed - if r, exists := pm.dc.VMap[v]; exists { - pm.dc.Infos[r] = pi - } - - return pi.Manifest, pi.Lock, nil - } - - return nil, nil, err -} - -func (pm *projectManager) ListPackages(v Version) (ptree PackageTree, err error) { - if err = pm.ensureCacheExistence(); err != nil { - return - } - - // See if we can find it in the cache - var r Revision - switch v.(type) { - case Revision, PairedVersion: - var ok bool - if r, ok = v.(Revision); !ok { - r = v.(PairedVersion).Underlying() - } - - if ptree, cached := pm.dc.Packages[r]; cached { - return ptree, nil - } - default: - var has bool - if r, has = pm.dc.VMap[v]; has { - if ptree, cached := pm.dc.Packages[r]; cached { - return ptree, nil - } - } - } - - // TODO(sdboyer) handle the case where we have a version w/out rev, and not in cache - - // Not in the cache; check out the version and do the analysis - pm.crepo.mut.Lock() - // Check out the desired version for analysis - if r != "" { - // Always prefer a rev, if it's available - err = pm.crepo.r.UpdateVersion(string(r)) - } else { - // If we don't have a rev, ensure the repo is up to date, otherwise we - // could have a desync issue - if !pm.crepo.synced { - err = pm.crepo.r.Update() - if err != nil { - return PackageTree{}, fmt.Errorf("Could not fetch latest updates into repository: %s", err) - } - pm.crepo.synced = true - } - err = pm.crepo.r.UpdateVersion(v.String()) - } - - ptree, err = listPackages(filepath.Join(pm.ctx.GOPATH, "src", string(pm.n)), string(pm.n)) - pm.crepo.mut.Unlock() - - // TODO(sdboyer) cache errs? - if err != nil { - pm.dc.Packages[r] = ptree - } - - return -} - -func (pm *projectManager) ensureCacheExistence() error { - // Technically, methods could could attempt to return straight from the - // metadata cache even if the repo cache doesn't exist on disk. But that - // would allow weird state inconsistencies (cache exists, but no repo...how - // does that even happen?) that it'd be better to just not allow so that we - // don't have to think about it elsewhere - if !pm.CheckExistence(existsInCache) { - if pm.CheckExistence(existsUpstream) { - pm.crepo.mut.Lock() - err := pm.crepo.r.Get() - pm.crepo.mut.Unlock() - - if err != nil { - return fmt.Errorf("failed to create repository cache for %s", pm.n) - } - pm.ex.s |= existsInCache - pm.ex.f |= existsInCache - } else { - return fmt.Errorf("project %s does not exist upstream", pm.n) - } - } - - return nil -} - -func (pm *projectManager) ListVersions() (vlist []Version, err error) { - if !pm.cvsync { - // This check only guarantees that the upstream exists, not the cache - pm.ex.s |= existsUpstream - vpairs, exbits, err := pm.crepo.getCurrentVersionPairs() - // But it *may* also check the local existence - pm.ex.s |= exbits - pm.ex.f |= exbits - - if err != nil { - // TODO(sdboyer) More-er proper-er error - fmt.Println(err) - return nil, err - } - - vlist = make([]Version, len(vpairs)) - // mark our cache as synced if we got ExistsUpstream back - if exbits&existsUpstream == existsUpstream { - pm.cvsync = true - } - - // Process the version data into the cache - // TODO(sdboyer) detect out-of-sync data as we do this? - for k, v := range vpairs { - pm.dc.VMap[v] = v.Underlying() - pm.dc.RMap[v.Underlying()] = append(pm.dc.RMap[v.Underlying()], v) - vlist[k] = v - } - } else { - vlist = make([]Version, len(pm.dc.VMap)) - k := 0 - // TODO(sdboyer) key type of VMap should be string; recombine here - //for v, r := range pm.dc.VMap { - for v := range pm.dc.VMap { - vlist[k] = v - k++ - } - } - - return -} - -func (pm *projectManager) RevisionPresentIn(r Revision) (bool, error) { - // First and fastest path is to check the data cache to see if the rev is - // present. This could give us false positives, but the cases where that can - // occur would require a type of cache staleness that seems *exceedingly* - // unlikely to occur. - if _, has := pm.dc.Infos[r]; has { - return true, nil - } else if _, has := pm.dc.RMap[r]; has { - return true, nil - } - - // For now at least, just run GetInfoAt(); it basically accomplishes the - // same thing. - if _, _, err := pm.GetInfoAt(r); err != nil { - return false, err - } - return true, nil -} - -// CheckExistence provides a direct method for querying existence levels of the -// project. It will only perform actual searching (local fs or over the network) -// if no previous attempt at that search has been made. -// -// Note that this may perform read-ish operations on the cache repo, and it -// takes a lock accordingly. Deadlock may result from calling it during a -// segment where the cache repo mutex is already write-locked. -func (pm *projectManager) CheckExistence(ex projectExistence) bool { - if pm.ex.s&ex != ex { - if ex&existsInVendorRoot != 0 && pm.ex.s&existsInVendorRoot == 0 { - panic("should now be implemented in bridge") - } - if ex&existsInCache != 0 && pm.ex.s&existsInCache == 0 { - pm.crepo.mut.RLock() - pm.ex.s |= existsInCache - if pm.crepo.r.CheckLocal() { - pm.ex.f |= existsInCache - } - pm.crepo.mut.RUnlock() - } - if ex&existsUpstream != 0 && pm.ex.s&existsUpstream == 0 { - pm.crepo.mut.RLock() - pm.ex.s |= existsUpstream - if pm.crepo.r.Ping() { - pm.ex.f |= existsUpstream - } - pm.crepo.mut.RUnlock() - } - } - - return ex&pm.ex.f == ex -} - -func (pm *projectManager) ExportVersionTo(v Version, to string) error { - return pm.crepo.exportVersionTo(v, to) -} - -func (r *repo) getCurrentVersionPairs() (vlist []PairedVersion, exbits projectExistence, err error) { - r.mut.Lock() - defer r.mut.Unlock() - - switch r.r.(type) { - case *vcs.GitRepo: - var out []byte - c := exec.Command("git", "ls-remote", r.r.Remote()) - // Ensure no terminal prompting for PWs - c.Env = mergeEnvLists([]string{"GIT_TERMINAL_PROMPT=0"}, os.Environ()) - out, err = c.CombinedOutput() - - all := bytes.Split(bytes.TrimSpace(out), []byte("\n")) - if err != nil || len(all) == 0 { - // TODO(sdboyer) remove this path? it really just complicates things, for - // probably not much benefit - - // ls-remote failed, probably due to bad communication or a faulty - // upstream implementation. So fetch updates, then build the list - // locally - err = r.r.Update() - if err != nil { - // Definitely have a problem, now - bail out - return - } - - // Upstream and cache must exist, so add that to exbits - exbits |= existsUpstream | existsInCache - // Also, local is definitely now synced - r.synced = true - - out, err = r.r.RunFromDir("git", "show-ref", "--dereference") - if err != nil { - return - } - - all = bytes.Split(bytes.TrimSpace(out), []byte("\n")) - } - // Local cache may not actually exist here, but upstream definitely does - exbits |= existsUpstream - - tmap := make(map[string]PairedVersion) - for _, pair := range all { - var v PairedVersion - if string(pair[46:51]) == "heads" { - v = NewBranch(string(pair[52:])).Is(Revision(pair[:40])).(PairedVersion) - vlist = append(vlist, v) - } else if string(pair[46:50]) == "tags" { - vstr := string(pair[51:]) - if strings.HasSuffix(vstr, "^{}") { - // If the suffix is there, then we *know* this is the rev of - // the underlying commit object that we actually want - vstr = strings.TrimSuffix(vstr, "^{}") - } else if _, exists := tmap[vstr]; exists { - // Already saw the deref'd version of this tag, if one - // exists, so skip this. - continue - // Can only hit this branch if we somehow got the deref'd - // version first. Which should be impossible, but this - // covers us in case of weirdness, anyway. - } - v = NewVersion(vstr).Is(Revision(pair[:40])).(PairedVersion) - tmap[vstr] = v - } - } - - // Append all the deref'd (if applicable) tags into the list - for _, v := range tmap { - vlist = append(vlist, v) - } - case *vcs.BzrRepo: - var out []byte - // Update the local first - err = r.r.Update() - if err != nil { - return - } - // Upstream and cache must exist, so add that to exbits - exbits |= existsUpstream | existsInCache - // Also, local is definitely now synced - r.synced = true - - // Now, list all the tags - out, err = r.r.RunFromDir("bzr", "tags", "--show-ids", "-v") - if err != nil { - return - } - - all := bytes.Split(bytes.TrimSpace(out), []byte("\n")) - for _, line := range all { - idx := bytes.IndexByte(line, 32) // space - v := NewVersion(string(line[:idx])).Is(Revision(bytes.TrimSpace(line[idx:]))).(PairedVersion) - vlist = append(vlist, v) - } - - case *vcs.HgRepo: - var out []byte - err = r.r.Update() - if err != nil { - return - } - - // Upstream and cache must exist, so add that to exbits - exbits |= existsUpstream | existsInCache - // Also, local is definitely now synced - r.synced = true - - out, err = r.r.RunFromDir("hg", "tags", "--debug", "--verbose") - if err != nil { - return - } - - all := bytes.Split(bytes.TrimSpace(out), []byte("\n")) - lbyt := []byte("local") - nulrev := []byte("0000000000000000000000000000000000000000") - for _, line := range all { - if bytes.Equal(lbyt, line[len(line)-len(lbyt):]) { - // Skip local tags - continue - } - - // tip is magic, don't include it - if bytes.HasPrefix(line, []byte("tip")) { - continue - } - - // Split on colon; this gets us the rev and the tag plus local revno - pair := bytes.Split(line, []byte(":")) - if bytes.Equal(nulrev, pair[1]) { - // null rev indicates this tag is marked for deletion - continue - } - - idx := bytes.IndexByte(pair[0], 32) // space - v := NewVersion(string(pair[0][:idx])).Is(Revision(pair[1])).(PairedVersion) - vlist = append(vlist, v) - } - - out, err = r.r.RunFromDir("hg", "branches", "--debug", "--verbose") - if err != nil { - // better nothing than incomplete - vlist = nil - return - } - - all = bytes.Split(bytes.TrimSpace(out), []byte("\n")) - lbyt = []byte("(inactive)") - for _, line := range all { - if bytes.Equal(lbyt, line[len(line)-len(lbyt):]) { - // Skip inactive branches - continue - } - - // Split on colon; this gets us the rev and the branch plus local revno - pair := bytes.Split(line, []byte(":")) - idx := bytes.IndexByte(pair[0], 32) // space - v := NewBranch(string(pair[0][:idx])).Is(Revision(pair[1])).(PairedVersion) - vlist = append(vlist, v) - } - case *vcs.SvnRepo: - // TODO(sdboyer) is it ok to return empty vlist and no error? - // TODO(sdboyer) ...gotta do something for svn, right? - default: - panic("unknown repo type") - } - - return -} - -func (r *repo) exportVersionTo(v Version, to string) error { - r.mut.Lock() - defer r.mut.Unlock() - - switch r.r.(type) { - case *vcs.GitRepo: - // Back up original index - idx, bak := path.Join(r.rpath, ".git", "index"), path.Join(r.rpath, ".git", "origindex") - err := os.Rename(idx, bak) - if err != nil { - return err - } - - // TODO(sdboyer) could have an err here - defer os.Rename(bak, idx) - - vstr := v.String() - if rv, ok := v.(PairedVersion); ok { - vstr = rv.Underlying().String() - } - _, err = r.r.RunFromDir("git", "read-tree", vstr) - if err != nil { - return err - } - - // Ensure we have exactly one trailing slash - to = strings.TrimSuffix(to, string(os.PathSeparator)) + string(os.PathSeparator) - // Checkout from our temporary index to the desired target location on disk; - // now it's git's job to make it fast. Sadly, this approach *does* also - // write out vendor dirs. There doesn't appear to be a way to make - // checkout-index respect sparse checkout rules (-a supercedes it); - // the alternative is using plain checkout, though we have a bunch of - // housekeeping to do to set up, then tear down, the sparse checkout - // controls, as well as restore the original index and HEAD. - _, err = r.r.RunFromDir("git", "checkout-index", "-a", "--prefix="+to) - return err - default: - // TODO(sdboyer) This is a dumb, slow approach, but we're punting on making these - // fast for now because git is the OVERWHELMING case - r.r.UpdateVersion(v.String()) - - cfg := &shutil.CopyTreeOptions{ - Symlinks: true, - CopyFunction: shutil.Copy, - Ignore: func(src string, contents []os.FileInfo) (ignore []string) { - for _, fi := range contents { - if !fi.IsDir() { - continue - } - n := fi.Name() - switch n { - case "vendor", ".bzr", ".svn", ".hg": - ignore = append(ignore, n) - } - } - - return - }, - } - - return shutil.CopyTree(r.rpath, to, cfg) - } -} - -// This func copied from Masterminds/vcs so we can exec our own commands -func mergeEnvLists(in, out []string) []string { -NextVar: - for _, inkv := range in { - k := strings.SplitAfterN(inkv, "=", 2)[0] - for i, outkv := range out { - if strings.HasPrefix(outkv, k) { - out[i] = inkv - continue NextVar - } - } - out = append(out, inkv) - } - return out -} - -func stripVendor(path string, info os.FileInfo, err error) error { - if info.Name() == "vendor" { - if _, err := os.Lstat(path); err == nil { - if info.IsDir() { - return removeAll(path) - } - } - } - - return nil -} diff --git a/remote.go b/remote.go deleted file mode 100644 index c808d9a..0000000 --- a/remote.go +++ /dev/null @@ -1,306 +0,0 @@ -package gps - -import ( - "fmt" - "io" - "net/http" - "net/url" - "regexp" - "strings" -) - -// A remoteRepo represents a potential remote repository resource. -// -// RemoteRepos are based purely on lexical analysis; successfully constructing -// one is not a guarantee that the resource it identifies actually exists or is -// accessible. -type remoteRepo struct { - Base string - RelPkg string - CloneURL *url.URL - Schemes []string - VCS []string -} - -//type remoteResult struct { -//r remoteRepo -//err error -//} - -// TODO(sdboyer) sync access to this map -//var remoteCache = make(map[string]remoteResult) - -// Regexes for the different known import path flavors -var ( - // This regex allowed some usernames that github currently disallows. They - // may have allowed them in the past; keeping it in case we need to revert. - //ghRegex = regexp.MustCompile(`^(?Pgithub\.com/([A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+))(/[A-Za-z0-9_.\-]+)*$`) - ghRegex = regexp.MustCompile(`^(?Pgithub\.com/([A-Za-z0-9][-A-Za-z0-9]*[A-Za-z0-9]/[A-Za-z0-9_.\-]+))((?:/[A-Za-z0-9_.\-]+)*)$`) - gpinNewRegex = regexp.MustCompile(`^(?Pgopkg\.in/(?:([a-zA-Z0-9][-a-zA-Z0-9]+)/)?([a-zA-Z][-.a-zA-Z0-9]*)\.((?:v0|v[1-9][0-9]*)(?:\.0|\.[1-9][0-9]*){0,2}(-unstable)?)(?:\.git)?)((?:/[a-zA-Z0-9][-.a-zA-Z0-9]*)*)$`) - //gpinOldRegex = regexp.MustCompile(`^(?Pgopkg\.in/(?:([a-z0-9][-a-z0-9]+)/)?((?:v0|v[1-9][0-9]*)(?:\.0|\.[1-9][0-9]*){0,2}(-unstable)?)/([a-zA-Z][-a-zA-Z0-9]*)(?:\.git)?)((?:/[a-zA-Z][-a-zA-Z0-9]*)*)$`) - bbRegex = regexp.MustCompile(`^(?Pbitbucket\.org/(?P[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+))((?:/[A-Za-z0-9_.\-]+)*)$`) - //lpRegex = regexp.MustCompile(`^(?Plaunchpad\.net/([A-Za-z0-9-._]+)(/[A-Za-z0-9-._]+)?)(/.+)?`) - lpRegex = regexp.MustCompile(`^(?Plaunchpad\.net/([A-Za-z0-9-._]+))((?:/[A-Za-z0-9_.\-]+)*)?`) - //glpRegex = regexp.MustCompile(`^(?Pgit\.launchpad\.net/([A-Za-z0-9_.\-]+)|~[A-Za-z0-9_.\-]+/(\+git|[A-Za-z0-9_.\-]+)/[A-Za-z0-9_.\-]+)$`) - glpRegex = regexp.MustCompile(`^(?Pgit\.launchpad\.net/([A-Za-z0-9_.\-]+))((?:/[A-Za-z0-9_.\-]+)*)$`) - //gcRegex = regexp.MustCompile(`^(?Pcode\.google\.com/[pr]/(?P[a-z0-9\-]+)(\.(?P[a-z0-9\-]+))?)(/[A-Za-z0-9_.\-]+)*$`) - jazzRegex = regexp.MustCompile(`^(?Phub\.jazz\.net/(git/[a-z0-9]+/[A-Za-z0-9_.\-]+))((?:/[A-Za-z0-9_.\-]+)*)$`) - apacheRegex = regexp.MustCompile(`^(?Pgit\.apache\.org/([a-z0-9_.\-]+\.git))((?:/[A-Za-z0-9_.\-]+)*)$`) - genericRegex = regexp.MustCompile(`^(?P(?P([a-z0-9.\-]+\.)+[a-z0-9.\-]+(:[0-9]+)?/[A-Za-z0-9_.\-/~]*?)\.(?Pbzr|git|hg|svn))((?:/[A-Za-z0-9_.\-]+)*)$`) -) - -// Other helper regexes -var ( - scpSyntaxRe = regexp.MustCompile(`^([a-zA-Z0-9_]+)@([a-zA-Z0-9._-]+):(.*)$`) - pathvld = regexp.MustCompile(`^([A-Za-z0-9-]+)(\.[A-Za-z0-9-]+)+(/[A-Za-z0-9-_.~]+)*$`) -) - -// deduceRemoteRepo takes a potential import path and returns a RemoteRepo -// representing the remote location of the source of an import path. Remote -// repositories can be bare import paths, or urls including a checkout scheme. -func deduceRemoteRepo(path string) (rr *remoteRepo, err error) { - rr = &remoteRepo{} - if m := scpSyntaxRe.FindStringSubmatch(path); m != nil { - // Match SCP-like syntax and convert it to a URL. - // Eg, "git@github.com:user/repo" becomes - // "ssh://git@github.com/user/repo". - rr.CloneURL = &url.URL{ - Scheme: "ssh", - User: url.User(m[1]), - Host: m[2], - Path: "/" + m[3], - // TODO(sdboyer) This is what stdlib sets; grok why better - //RawPath: m[3], - } - } else { - rr.CloneURL, err = url.Parse(path) - if err != nil { - return nil, fmt.Errorf("%q is not a valid import path", path) - } - } - - if rr.CloneURL.Host != "" { - path = rr.CloneURL.Host + "/" + strings.TrimPrefix(rr.CloneURL.Path, "/") - } else { - path = rr.CloneURL.Path - } - - if !pathvld.MatchString(path) { - return nil, fmt.Errorf("%q is not a valid import path", path) - } - - if rr.CloneURL.Scheme != "" { - rr.Schemes = []string{rr.CloneURL.Scheme} - } - - // TODO(sdboyer) instead of a switch, encode base domain in radix tree and pick - // detector from there; if failure, then fall back on metadata work - - switch { - case ghRegex.MatchString(path): - v := ghRegex.FindStringSubmatch(path) - - rr.CloneURL.Host = "github.com" - rr.CloneURL.Path = v[2] - rr.Base = v[1] - rr.RelPkg = strings.TrimPrefix(v[3], "/") - rr.VCS = []string{"git"} - - return - - case gpinNewRegex.MatchString(path): - v := gpinNewRegex.FindStringSubmatch(path) - // Duplicate some logic from the gopkg.in server in order to validate - // the import path string without having to hit the server - if strings.Contains(v[4], ".") { - return nil, fmt.Errorf("%q is not a valid import path; gopkg.in only allows major versions (%q instead of %q)", - path, v[4][:strings.Index(v[4], ".")], v[4]) - } - - // gopkg.in is always backed by github - rr.CloneURL.Host = "github.com" - // If the third position is empty, it's the shortened form that expands - // to the go-pkg github user - if v[2] == "" { - rr.CloneURL.Path = "go-pkg/" + v[3] - } else { - rr.CloneURL.Path = v[2] + "/" + v[3] - } - rr.Base = v[1] - rr.RelPkg = strings.TrimPrefix(v[6], "/") - rr.VCS = []string{"git"} - - return - //case gpinOldRegex.MatchString(path): - - case bbRegex.MatchString(path): - v := bbRegex.FindStringSubmatch(path) - - rr.CloneURL.Host = "bitbucket.org" - rr.CloneURL.Path = v[2] - rr.Base = v[1] - rr.RelPkg = strings.TrimPrefix(v[3], "/") - rr.VCS = []string{"git", "hg"} - - return - - //case gcRegex.MatchString(path): - //v := gcRegex.FindStringSubmatch(path) - - //rr.CloneURL.Host = "code.google.com" - //rr.CloneURL.Path = "p/" + v[2] - //rr.Base = v[1] - //rr.RelPkg = strings.TrimPrefix(v[5], "/") - //rr.VCS = []string{"hg", "git"} - - //return - - case lpRegex.MatchString(path): - // TODO(sdboyer) lp handling is nasty - there's ambiguities which can only really - // be resolved with a metadata request. See https://github.com/golang/go/issues/11436 - v := lpRegex.FindStringSubmatch(path) - - rr.CloneURL.Host = "launchpad.net" - rr.CloneURL.Path = v[2] - rr.Base = v[1] - rr.RelPkg = strings.TrimPrefix(v[3], "/") - rr.VCS = []string{"bzr"} - - return - - case glpRegex.MatchString(path): - // TODO(sdboyer) same ambiguity issues as with normal bzr lp - v := glpRegex.FindStringSubmatch(path) - - rr.CloneURL.Host = "git.launchpad.net" - rr.CloneURL.Path = v[2] - rr.Base = v[1] - rr.RelPkg = strings.TrimPrefix(v[3], "/") - rr.VCS = []string{"git"} - - return - - case jazzRegex.MatchString(path): - v := jazzRegex.FindStringSubmatch(path) - - rr.CloneURL.Host = "hub.jazz.net" - rr.CloneURL.Path = v[2] - rr.Base = v[1] - rr.RelPkg = strings.TrimPrefix(v[3], "/") - rr.VCS = []string{"git"} - - return - - case apacheRegex.MatchString(path): - v := apacheRegex.FindStringSubmatch(path) - - rr.CloneURL.Host = "git.apache.org" - rr.CloneURL.Path = v[2] - rr.Base = v[1] - rr.RelPkg = strings.TrimPrefix(v[3], "/") - rr.VCS = []string{"git"} - - return - - // try the general syntax - case genericRegex.MatchString(path): - v := genericRegex.FindStringSubmatch(path) - switch v[5] { - case "git", "hg", "bzr": - x := strings.SplitN(v[1], "/", 2) - // TODO(sdboyer) is this actually correct for bzr? - rr.CloneURL.Host = x[0] - rr.CloneURL.Path = x[1] - rr.VCS = []string{v[5]} - rr.Base = v[1] - rr.RelPkg = strings.TrimPrefix(v[6], "/") - return - default: - return nil, fmt.Errorf("unknown repository type: %q", v[5]) - } - } - - // No luck so far. maybe it's one of them vanity imports? - importroot, vcs, reporoot, err := parseMetadata(path) - if err != nil { - return nil, fmt.Errorf("unable to deduce repository and source type for: %q", path) - } - - // If we got something back at all, then it supercedes the actual input for - // the real URL to hit - rr.CloneURL, err = url.Parse(reporoot) - if err != nil { - return nil, fmt.Errorf("server returned bad URL when searching for vanity import: %q", reporoot) - } - - // We have a real URL. Set the other values and return. - rr.Base = importroot - rr.RelPkg = strings.TrimPrefix(path[len(importroot):], "/") - - rr.VCS = []string{vcs} - if rr.CloneURL.Scheme != "" { - rr.Schemes = []string{rr.CloneURL.Scheme} - } - - return rr, nil -} - -// fetchMetadata fetchs the remote metadata for path. -func fetchMetadata(path string) (rc io.ReadCloser, err error) { - defer func() { - if err != nil { - err = fmt.Errorf("unable to determine remote metadata protocol: %s", err) - } - }() - - // try https first - rc, err = doFetchMetadata("https", path) - if err == nil { - return - } - - rc, err = doFetchMetadata("http", path) - return -} - -func doFetchMetadata(scheme, path string) (io.ReadCloser, error) { - url := fmt.Sprintf("%s://%s?go-get=1", scheme, path) - switch scheme { - case "https", "http": - resp, err := http.Get(url) - if err != nil { - return nil, fmt.Errorf("failed to access url %q", url) - } - return resp.Body, nil - default: - return nil, fmt.Errorf("unknown remote protocol scheme: %q", scheme) - } -} - -// parseMetadata fetches and decodes remote metadata for path. -func parseMetadata(path string) (string, string, string, error) { - rc, err := fetchMetadata(path) - if err != nil { - return "", "", "", err - } - defer rc.Close() - - imports, err := parseMetaGoImports(rc) - if err != nil { - return "", "", "", err - } - match := -1 - for i, im := range imports { - if !strings.HasPrefix(path, im.Prefix) { - continue - } - if match != -1 { - return "", "", "", fmt.Errorf("multiple meta tags match import path %q", path) - } - match = i - } - if match == -1 { - return "", "", "", fmt.Errorf("go-import metadata not found") - } - return imports[match].Prefix, imports[match].VCS, imports[match].RepoRoot, nil -} diff --git a/remote_test.go b/remote_test.go deleted file mode 100644 index 17de00f..0000000 --- a/remote_test.go +++ /dev/null @@ -1,478 +0,0 @@ -package gps - -import ( - "fmt" - "net/url" - "reflect" - "testing" -) - -func TestDeduceRemotes(t *testing.T) { - if testing.Short() { - t.Skip("Skipping remote deduction test in short mode") - } - - fixtures := []struct { - path string - want *remoteRepo - }{ - { - "github.com/sdboyer/gps", - &remoteRepo{ - Base: "github.com/sdboyer/gps", - RelPkg: "", - CloneURL: &url.URL{ - Host: "github.com", - Path: "sdboyer/gps", - }, - Schemes: nil, - VCS: []string{"git"}, - }, - }, - { - "github.com/sdboyer/gps/foo", - &remoteRepo{ - Base: "github.com/sdboyer/gps", - RelPkg: "foo", - CloneURL: &url.URL{ - Host: "github.com", - Path: "sdboyer/gps", - }, - Schemes: nil, - VCS: []string{"git"}, - }, - }, - { - "git@github.com:sdboyer/gps", - &remoteRepo{ - Base: "github.com/sdboyer/gps", - RelPkg: "", - CloneURL: &url.URL{ - Scheme: "ssh", - User: url.User("git"), - Host: "github.com", - Path: "sdboyer/gps", - }, - Schemes: []string{"ssh"}, - VCS: []string{"git"}, - }, - }, - { - "https://github.com/sdboyer/gps/foo", - &remoteRepo{ - Base: "github.com/sdboyer/gps", - RelPkg: "foo", - CloneURL: &url.URL{ - Scheme: "https", - Host: "github.com", - Path: "sdboyer/gps", - }, - Schemes: []string{"https"}, - VCS: []string{"git"}, - }, - }, - { - "https://github.com/sdboyer/gps/foo/bar", - &remoteRepo{ - Base: "github.com/sdboyer/gps", - RelPkg: "foo/bar", - CloneURL: &url.URL{ - Scheme: "https", - Host: "github.com", - Path: "sdboyer/gps", - }, - Schemes: []string{"https"}, - VCS: []string{"git"}, - }, - }, - // some invalid github username patterns - { - "github.com/-sdboyer/gps/foo", - nil, - }, - { - "github.com/sdboyer-/gps/foo", - nil, - }, - { - "github.com/sdbo.yer/gps/foo", - nil, - }, - { - "github.com/sdbo_yer/gps/foo", - nil, - }, - { - "gopkg.in/sdboyer/gps.v0", - &remoteRepo{ - Base: "gopkg.in/sdboyer/gps.v0", - RelPkg: "", - CloneURL: &url.URL{ - Host: "github.com", - Path: "sdboyer/gps", - }, - VCS: []string{"git"}, - }, - }, - { - "gopkg.in/sdboyer/gps.v0/foo", - &remoteRepo{ - Base: "gopkg.in/sdboyer/gps.v0", - RelPkg: "foo", - CloneURL: &url.URL{ - Host: "github.com", - Path: "sdboyer/gps", - }, - VCS: []string{"git"}, - }, - }, - { - "gopkg.in/sdboyer/gps.v0/foo/bar", - &remoteRepo{ - Base: "gopkg.in/sdboyer/gps.v0", - RelPkg: "foo/bar", - CloneURL: &url.URL{ - Host: "github.com", - Path: "sdboyer/gps", - }, - VCS: []string{"git"}, - }, - }, - { - "gopkg.in/yaml.v1", - &remoteRepo{ - Base: "gopkg.in/yaml.v1", - RelPkg: "", - CloneURL: &url.URL{ - Host: "github.com", - Path: "go-pkg/yaml", - }, - VCS: []string{"git"}, - }, - }, - { - "gopkg.in/yaml.v1/foo/bar", - &remoteRepo{ - Base: "gopkg.in/yaml.v1", - RelPkg: "foo/bar", - CloneURL: &url.URL{ - Host: "github.com", - Path: "go-pkg/yaml", - }, - VCS: []string{"git"}, - }, - }, - { - // gopkg.in only allows specifying major version in import path - "gopkg.in/yaml.v1.2", - nil, - }, - // IBM hub devops services - fixtures borrowed from go get - { - "hub.jazz.net/git/user1/pkgname", - &remoteRepo{ - Base: "hub.jazz.net/git/user1/pkgname", - RelPkg: "", - CloneURL: &url.URL{ - Host: "hub.jazz.net", - Path: "git/user1/pkgname", - }, - VCS: []string{"git"}, - }, - }, - { - "hub.jazz.net/git/user1/pkgname/submodule/submodule/submodule", - &remoteRepo{ - Base: "hub.jazz.net/git/user1/pkgname", - RelPkg: "submodule/submodule/submodule", - CloneURL: &url.URL{ - Host: "hub.jazz.net", - Path: "git/user1/pkgname", - }, - VCS: []string{"git"}, - }, - }, - { - "hub.jazz.net", - nil, - }, - { - "hub2.jazz.net", - nil, - }, - { - "hub.jazz.net/someotherprefix", - nil, - }, - { - "hub.jazz.net/someotherprefix/user1/pkgname", - nil, - }, - // Spaces are not valid in user names or package names - { - "hub.jazz.net/git/User 1/pkgname", - nil, - }, - { - "hub.jazz.net/git/user1/pkg name", - nil, - }, - // Dots are not valid in user names - { - "hub.jazz.net/git/user.1/pkgname", - nil, - }, - { - "hub.jazz.net/git/user/pkg.name", - &remoteRepo{ - Base: "hub.jazz.net/git/user/pkg.name", - RelPkg: "", - CloneURL: &url.URL{ - Host: "hub.jazz.net", - Path: "git/user/pkg.name", - }, - VCS: []string{"git"}, - }, - }, - // User names cannot have uppercase letters - { - "hub.jazz.net/git/USER/pkgname", - nil, - }, - { - "bitbucket.org/sdboyer/reporoot", - &remoteRepo{ - Base: "bitbucket.org/sdboyer/reporoot", - RelPkg: "", - CloneURL: &url.URL{ - Host: "bitbucket.org", - Path: "sdboyer/reporoot", - }, - VCS: []string{"git", "hg"}, - }, - }, - { - "bitbucket.org/sdboyer/reporoot/foo/bar", - &remoteRepo{ - Base: "bitbucket.org/sdboyer/reporoot", - RelPkg: "foo/bar", - CloneURL: &url.URL{ - Host: "bitbucket.org", - Path: "sdboyer/reporoot", - }, - VCS: []string{"git", "hg"}, - }, - }, - { - "https://bitbucket.org/sdboyer/reporoot/foo/bar", - &remoteRepo{ - Base: "bitbucket.org/sdboyer/reporoot", - RelPkg: "foo/bar", - CloneURL: &url.URL{ - Scheme: "https", - Host: "bitbucket.org", - Path: "sdboyer/reporoot", - }, - Schemes: []string{"https"}, - VCS: []string{"git", "hg"}, - }, - }, - { - "launchpad.net/govcstestbzrrepo", - &remoteRepo{ - Base: "launchpad.net/govcstestbzrrepo", - RelPkg: "", - CloneURL: &url.URL{ - Host: "launchpad.net", - Path: "govcstestbzrrepo", - }, - VCS: []string{"bzr"}, - }, - }, - { - "launchpad.net/govcstestbzrrepo/foo/bar", - &remoteRepo{ - Base: "launchpad.net/govcstestbzrrepo", - RelPkg: "foo/bar", - CloneURL: &url.URL{ - Host: "launchpad.net", - Path: "govcstestbzrrepo", - }, - VCS: []string{"bzr"}, - }, - }, - { - "launchpad.net/repo root", - nil, - }, - { - "git.launchpad.net/reporoot", - &remoteRepo{ - Base: "git.launchpad.net/reporoot", - RelPkg: "", - CloneURL: &url.URL{ - Host: "git.launchpad.net", - Path: "reporoot", - }, - VCS: []string{"git"}, - }, - }, - { - "git.launchpad.net/reporoot/foo/bar", - &remoteRepo{ - Base: "git.launchpad.net/reporoot", - RelPkg: "foo/bar", - CloneURL: &url.URL{ - Host: "git.launchpad.net", - Path: "reporoot", - }, - VCS: []string{"git"}, - }, - }, - { - "git.launchpad.net/reporoot", - &remoteRepo{ - Base: "git.launchpad.net/reporoot", - RelPkg: "", - CloneURL: &url.URL{ - Host: "git.launchpad.net", - Path: "reporoot", - }, - VCS: []string{"git"}, - }, - }, - { - "git.launchpad.net/repo root", - nil, - }, - { - "git.apache.org/package-name.git", - &remoteRepo{ - Base: "git.apache.org/package-name.git", - RelPkg: "", - CloneURL: &url.URL{ - Host: "git.apache.org", - Path: "package-name.git", - }, - VCS: []string{"git"}, - }, - }, - { - "git.apache.org/package-name.git/foo/bar", - &remoteRepo{ - Base: "git.apache.org/package-name.git", - RelPkg: "foo/bar", - CloneURL: &url.URL{ - Host: "git.apache.org", - Path: "package-name.git", - }, - VCS: []string{"git"}, - }, - }, - // Vanity imports - { - "golang.org/x/exp", - &remoteRepo{ - Base: "golang.org/x/exp", - RelPkg: "", - CloneURL: &url.URL{ - Scheme: "https", - Host: "go.googlesource.com", - Path: "/exp", - }, - Schemes: []string{"https"}, - VCS: []string{"git"}, - }, - }, - { - "golang.org/x/exp/inotify", - &remoteRepo{ - Base: "golang.org/x/exp", - RelPkg: "inotify", - CloneURL: &url.URL{ - Scheme: "https", - Host: "go.googlesource.com", - Path: "/exp", - }, - Schemes: []string{"https"}, - VCS: []string{"git"}, - }, - }, - { - "rsc.io/pdf", - &remoteRepo{ - Base: "rsc.io/pdf", - RelPkg: "", - CloneURL: &url.URL{ - Scheme: "https", - Host: "github.com", - Path: "/rsc/pdf", - }, - Schemes: []string{"https"}, - VCS: []string{"git"}, - }, - }, - // Regression - gh does allow two-letter usernames - { - "github.com/kr/pretty", - &remoteRepo{ - Base: "github.com/kr/pretty", - RelPkg: "", - CloneURL: &url.URL{ - Host: "github.com", - Path: "kr/pretty", - }, - Schemes: nil, - VCS: []string{"git"}, - }, - }, - } - - for _, fix := range fixtures { - got, err := deduceRemoteRepo(fix.path) - want := fix.want - - if want == nil { - if err == nil { - t.Errorf("deduceRemoteRepo(%q): Error expected but not received", fix.path) - } - continue - } - - if err != nil { - t.Errorf("deduceRemoteRepo(%q): %v", fix.path, err) - continue - } - - if got.Base != want.Base { - t.Errorf("deduceRemoteRepo(%q): Base was %s, wanted %s", fix.path, got.Base, want.Base) - } - if got.RelPkg != want.RelPkg { - t.Errorf("deduceRemoteRepo(%q): RelPkg was %s, wanted %s", fix.path, got.RelPkg, want.RelPkg) - } - if !reflect.DeepEqual(got.CloneURL, want.CloneURL) { - // misspelling things is cool when it makes columns line up - t.Errorf("deduceRemoteRepo(%q): CloneURL disagreement:\n(GOT) %s\n(WNT) %s", fix.path, ufmt(got.CloneURL), ufmt(want.CloneURL)) - } - if !reflect.DeepEqual(got.VCS, want.VCS) { - t.Errorf("deduceRemoteRepo(%q): VCS was %s, wanted %s", fix.path, got.VCS, want.VCS) - } - if !reflect.DeepEqual(got.Schemes, want.Schemes) { - t.Errorf("deduceRemoteRepo(%q): Schemes was %s, wanted %s", fix.path, got.Schemes, want.Schemes) - } - } -} - -// borrow from stdlib -// more useful string for debugging than fmt's struct printer -func ufmt(u *url.URL) string { - var user, pass interface{} - if u.User != nil { - user = u.User.Username() - if p, ok := u.User.Password(); ok { - pass = p - } - } - return fmt.Sprintf("host=%q, path=%q, opaque=%q, scheme=%q, user=%#v, pass=%#v, rawpath=%q, rawq=%q, frag=%q", - u.Host, u.Path, u.Opaque, u.Scheme, user, pass, u.RawPath, u.RawQuery, u.Fragment) -} diff --git a/result.go b/result.go index e601de9..7b13f23 100644 --- a/result.go +++ b/result.go @@ -46,7 +46,7 @@ func CreateVendorTree(basedir string, l Lock, sm SourceManager, sv bool) error { return err } - err = sm.ExportProject(p.Ident().ProjectRoot, p.Version(), to) + err = sm.ExportProject(p.Ident(), p.Version(), to) if err != nil { removeAll(basedir) return fmt.Errorf("Error while exporting %s: %s", p.Ident().ProjectRoot, err) diff --git a/result_test.go b/result_test.go index f1544c6..61c20f3 100644 --- a/result_test.go +++ b/result_test.go @@ -48,12 +48,10 @@ func TestResultCreateVendorTree(t *testing.T) { tmp := path.Join(os.TempDir(), "vsolvtest") os.RemoveAll(tmp) - sm, err := NewSourceManager(naiveAnalyzer{}, path.Join(tmp, "cache"), false) - if err != nil { - t.Errorf("NewSourceManager errored unexpectedly: %q", err) - } + sm, clean := mkNaiveSM(t) + defer clean() - err = CreateVendorTree(path.Join(tmp, "export"), r, sm, true) + err := CreateVendorTree(path.Join(tmp, "export"), r, sm, true) if err != nil { t.Errorf("Unexpected error while creating vendor tree: %s", err) } @@ -77,7 +75,7 @@ func BenchmarkCreateVendorTree(b *testing.B) { // Prefetch the projects before timer starts for _, lp := range r.p { - _, _, err := sm.GetManifestAndLock(lp.Ident().ProjectRoot, lp.Version()) + _, _, err := sm.GetManifestAndLock(lp.Ident(), lp.Version()) if err != nil { b.Errorf("failed getting project info during prefetch: %s", err) clean = false diff --git a/satisfy.go b/satisfy.go index 686676d..ef9e688 100644 --- a/satisfy.go +++ b/satisfy.go @@ -99,7 +99,7 @@ func (s *solver) checkAtomAllowable(pa atom) error { // checkRequiredPackagesExist ensures that all required packages enumerated by // existing dependencies on this atom are actually present in the atom. func (s *solver) checkRequiredPackagesExist(a atomWithPackages) error { - ptree, err := s.b.listPackages(a.a.id, a.a.v) + ptree, err := s.b.ListPackages(a.a.id, a.a.v) if err != nil { // TODO(sdboyer) handle this more gracefully return err @@ -225,7 +225,7 @@ func (s *solver) checkPackageImportsFromDepExist(a atomWithPackages, cdep comple return nil } - ptree, err := s.b.listPackages(sel.a.id, sel.a.v) + ptree, err := s.b.ListPackages(sel.a.id, sel.a.v) if err != nil { // TODO(sdboyer) handle this more gracefully return err @@ -266,7 +266,7 @@ func (s *solver) checkRevisionExists(a atomWithPackages, cdep completeDep) error return nil } - present, _ := s.b.revisionPresentIn(cdep.Ident, r) + present, _ := s.b.RevisionPresentIn(cdep.Ident, r) if present { return nil } diff --git a/solve_basic_test.go b/solve_basic_test.go index ac833e3..6b6a092 100644 --- a/solve_basic_test.go +++ b/solve_basic_test.go @@ -1202,41 +1202,43 @@ func newdepspecSM(ds []depspec, ignore []string) *depspecSourceManager { } } -func (sm *depspecSourceManager) GetManifestAndLock(n ProjectRoot, v Version) (Manifest, Lock, error) { +func (sm *depspecSourceManager) GetManifestAndLock(id ProjectIdentifier, v Version) (Manifest, Lock, error) { for _, ds := range sm.specs { - if n == ds.n && v.Matches(ds.v) { + if id.ProjectRoot == ds.n && v.Matches(ds.v) { return ds, dummyLock{}, nil } } // TODO(sdboyer) proper solver-type errors - return nil, nil, fmt.Errorf("Project %s at version %s could not be found", n, v) + return nil, nil, fmt.Errorf("Project %s at version %s could not be found", id.errString(), v) } func (sm *depspecSourceManager) AnalyzerInfo() (string, *semver.Version) { return "depspec-sm-builtin", sv("v1.0.0") } -func (sm *depspecSourceManager) ExternalReach(n ProjectRoot, v Version) (map[string][]string, error) { - id := pident{n: n, v: v} - if m, exists := sm.rm[id]; exists { +func (sm *depspecSourceManager) ExternalReach(id ProjectIdentifier, v Version) (map[string][]string, error) { + pid := pident{n: id.ProjectRoot, v: v} + if m, exists := sm.rm[pid]; exists { return m, nil } - return nil, fmt.Errorf("No reach data for %s at version %s", n, v) + return nil, fmt.Errorf("No reach data for %s at version %s", id.errString(), v) } -func (sm *depspecSourceManager) ListExternal(n ProjectRoot, v Version) ([]string, error) { +func (sm *depspecSourceManager) ListExternal(id ProjectIdentifier, v Version) ([]string, error) { // This should only be called for the root - id := pident{n: n, v: v} - if r, exists := sm.rm[id]; exists { - return r[string(n)], nil + pid := pident{n: id.ProjectRoot, v: v} + if r, exists := sm.rm[pid]; exists { + return r[string(id.ProjectRoot)], nil } - return nil, fmt.Errorf("No reach data for %s at version %s", n, v) + return nil, fmt.Errorf("No reach data for %s at version %s", id.errString(), v) } -func (sm *depspecSourceManager) ListPackages(n ProjectRoot, v Version) (PackageTree, error) { - id := pident{n: n, v: v} - if r, exists := sm.rm[id]; exists { +func (sm *depspecSourceManager) ListPackages(id ProjectIdentifier, v Version) (PackageTree, error) { + pid := pident{n: id.ProjectRoot, v: v} + n := id.ProjectRoot + + if r, exists := sm.rm[pid]; exists { ptree := PackageTree{ ImportRoot: string(n), Packages: map[string]PackageOrErr{ @@ -1255,35 +1257,35 @@ func (sm *depspecSourceManager) ListPackages(n ProjectRoot, v Version) (PackageT return PackageTree{}, fmt.Errorf("Project %s at version %s could not be found", n, v) } -func (sm *depspecSourceManager) ListVersions(name ProjectRoot) (pi []Version, err error) { +func (sm *depspecSourceManager) ListVersions(id ProjectIdentifier) (pi []Version, err error) { for _, ds := range sm.specs { // To simulate the behavior of the real SourceManager, we do not return // revisions from ListVersions(). - if _, isrev := ds.v.(Revision); !isrev && name == ds.n { + if _, isrev := ds.v.(Revision); !isrev && id.ProjectRoot == ds.n { pi = append(pi, ds.v) } } if len(pi) == 0 { - err = fmt.Errorf("Project %s could not be found", name) + err = fmt.Errorf("Project %s could not be found", id.errString()) } return } -func (sm *depspecSourceManager) RevisionPresentIn(name ProjectRoot, r Revision) (bool, error) { +func (sm *depspecSourceManager) RevisionPresentIn(id ProjectIdentifier, r Revision) (bool, error) { for _, ds := range sm.specs { - if name == ds.n && r == ds.v { + if id.ProjectRoot == ds.n && r == ds.v { return true, nil } } - return false, fmt.Errorf("Project %s has no revision %s", name, r) + return false, fmt.Errorf("Project %s has no revision %s", id.errString(), r) } -func (sm *depspecSourceManager) RepoExists(name ProjectRoot) (bool, error) { +func (sm *depspecSourceManager) SourceExists(id ProjectIdentifier) (bool, error) { for _, ds := range sm.specs { - if name == ds.n { + if id.ProjectRoot == ds.n { return true, nil } } @@ -1291,16 +1293,26 @@ func (sm *depspecSourceManager) RepoExists(name ProjectRoot) (bool, error) { return false, nil } -func (sm *depspecSourceManager) VendorCodeExists(name ProjectRoot) (bool, error) { +func (sm *depspecSourceManager) VendorCodeExists(id ProjectIdentifier) (bool, error) { return false, nil } func (sm *depspecSourceManager) Release() {} -func (sm *depspecSourceManager) ExportProject(n ProjectRoot, v Version, to string) error { +func (sm *depspecSourceManager) ExportProject(id ProjectIdentifier, v Version, to string) error { return fmt.Errorf("dummy sm doesn't support exporting") } +func (sm *depspecSourceManager) DeduceProjectRoot(ip string) (ProjectRoot, error) { + for _, ds := range sm.allSpecs() { + n := string(ds.n) + if ip == n || strings.HasPrefix(ip, n+"/") { + return ProjectRoot(n), nil + } + } + return "", fmt.Errorf("Could not find %s, or any parent, in list of known fixtures", ip) +} + func (sm *depspecSourceManager) rootSpec() depspec { return sm.specs[0] } @@ -1324,7 +1336,7 @@ func (b *depspecBridge) computeRootReach() ([]string, error) { dsm := b.sm.(fixSM) root := dsm.rootSpec() - ptree, err := dsm.ListPackages(root.n, nil) + ptree, err := dsm.ListPackages(mkPI(string(root.n)), nil) if err != nil { return nil, err } @@ -1342,23 +1354,8 @@ func (b *depspecBridge) verifyRootDir(path string) error { return nil } -func (b *depspecBridge) listPackages(id ProjectIdentifier, v Version) (PackageTree, error) { - return b.sm.(fixSM).ListPackages(b.key(id), v) -} - -// override deduceRemoteRepo on bridge to make all our pkg/project mappings work -// as expected -func (b *depspecBridge) deduceRemoteRepo(path string) (*remoteRepo, error) { - for _, ds := range b.sm.(fixSM).allSpecs() { - n := string(ds.n) - if path == n || strings.HasPrefix(path, n+"/") { - return &remoteRepo{ - Base: n, - RelPkg: strings.TrimPrefix(path, n+"/"), - }, nil - } - } - return nil, fmt.Errorf("Could not find %s, or any parent, in list of known fixtures", path) +func (b *depspecBridge) ListPackages(id ProjectIdentifier, v Version) (PackageTree, error) { + return b.sm.(fixSM).ListPackages(id, v) } // enforce interfaces diff --git a/solve_bimodal_test.go b/solve_bimodal_test.go index 530d6e1..f62619d 100644 --- a/solve_bimodal_test.go +++ b/solve_bimodal_test.go @@ -649,12 +649,12 @@ func newbmSM(bmf bimodalFixture) *bmSourceManager { return sm } -func (sm *bmSourceManager) ListPackages(n ProjectRoot, v Version) (PackageTree, error) { +func (sm *bmSourceManager) ListPackages(id ProjectIdentifier, v Version) (PackageTree, error) { for k, ds := range sm.specs { // Cheat for root, otherwise we blow up b/c version is empty - if n == ds.n && (k == 0 || ds.v.Matches(v)) { + if id.ProjectRoot == ds.n && (k == 0 || ds.v.Matches(v)) { ptree := PackageTree{ - ImportRoot: string(n), + ImportRoot: string(id.ProjectRoot), Packages: make(map[string]PackageOrErr), } for _, pkg := range ds.pkgs { @@ -671,13 +671,13 @@ func (sm *bmSourceManager) ListPackages(n ProjectRoot, v Version) (PackageTree, } } - return PackageTree{}, fmt.Errorf("Project %s at version %s could not be found", n, v) + return PackageTree{}, fmt.Errorf("Project %s at version %s could not be found", id.errString(), v) } -func (sm *bmSourceManager) GetManifestAndLock(n ProjectRoot, v Version) (Manifest, Lock, error) { +func (sm *bmSourceManager) GetManifestAndLock(id ProjectIdentifier, v Version) (Manifest, Lock, error) { for _, ds := range sm.specs { - if n == ds.n && v.Matches(ds.v) { - if l, exists := sm.lm[string(n)+" "+v.String()]; exists { + if id.ProjectRoot == ds.n && v.Matches(ds.v) { + if l, exists := sm.lm[string(id.ProjectRoot)+" "+v.String()]; exists { return ds, l, nil } return ds, dummyLock{}, nil @@ -685,7 +685,7 @@ func (sm *bmSourceManager) GetManifestAndLock(n ProjectRoot, v Version) (Manifes } // TODO(sdboyer) proper solver-type errors - return nil, nil, fmt.Errorf("Project %s at version %s could not be found", n, v) + return nil, nil, fmt.Errorf("Project %s at version %s could not be found", id.errString(), v) } // computeBimodalExternalMap takes a set of depspecs and computes an diff --git a/errors.go b/solve_failures.go similarity index 100% rename from errors.go rename to solve_failures.go diff --git a/solve_test.go b/solve_test.go index 67d0b04..94ed8ba 100644 --- a/solve_test.go +++ b/solve_test.go @@ -30,7 +30,7 @@ func overrideMkBridge() { &bridge{ sm: sm, s: s, - vlists: make(map[ProjectRoot][]Version), + vlists: make(map[ProjectIdentifier][]Version), }, } } @@ -322,7 +322,7 @@ func TestBadSolveOpts(t *testing.T) { return &bridge{ sm: sm, s: s, - vlists: make(map[ProjectRoot][]Version), + vlists: make(map[ProjectIdentifier][]Version), } } diff --git a/solver.go b/solver.go index f6efd96..d82a40c 100644 --- a/solver.go +++ b/solver.go @@ -437,7 +437,7 @@ func (s *solver) selectRoot() error { v: rootRev, } - ptree, err := s.b.listPackages(pa.id, nil) + ptree, err := s.b.ListPackages(pa.id, nil) if err != nil { return err } @@ -493,12 +493,12 @@ func (s *solver) getImportsAndConstraintsOf(a atomWithPackages) ([]completeDep, // Work through the source manager to get project info and static analysis // information. - m, _, err := s.b.getManifestAndLock(a.a) + m, _, err := s.b.GetManifestAndLock(a.a.id, a.a.v) if err != nil { return nil, err } - ptree, err := s.b.listPackages(a.a.id, a.a.v) + ptree, err := s.b.ListPackages(a.a.id, a.a.v) if err != nil { return nil, err } @@ -596,7 +596,7 @@ func (s *solver) intersectConstraintsWithImports(deps []workingConstraint, reach } // No match. Let the SourceManager try to figure out the root - root, err := s.b.deduceRemoteRepo(rp) + root, err := s.b.DeduceProjectRoot(rp) if err != nil { // Nothing we can do if we can't suss out a root return nil, err @@ -605,17 +605,17 @@ func (s *solver) intersectConstraintsWithImports(deps []workingConstraint, reach // Make a new completeDep with an open constraint, respecting overrides pd := s.ovr.override(ProjectConstraint{ Ident: ProjectIdentifier{ - ProjectRoot: ProjectRoot(root.Base), - NetworkName: root.Base, + ProjectRoot: root, + NetworkName: string(root), }, Constraint: Any(), }) // Insert the pd into the trie so that further deps from this // project get caught by the prefix search - xt.Insert(root.Base, pd) + xt.Insert(string(root), pd) // And also put the complete dep into the dmap - dmap[ProjectRoot(root.Base)] = completeDep{ + dmap[root] = completeDep{ workingConstraint: pd, pl: []string{rp}, } @@ -639,7 +639,7 @@ func (s *solver) createVersionQueue(bmi bimodalIdentifier) (*versionQueue, error return newVersionQueue(id, nil, nil, s.b) } - exists, err := s.b.repoExists(id) + exists, err := s.b.SourceExists(id) if err != nil { return nil, err } @@ -679,7 +679,7 @@ func (s *solver) createVersionQueue(bmi bimodalIdentifier) (*versionQueue, error continue } - _, l, err := s.b.getManifestAndLock(dep.depender) + _, l, err := s.b.GetManifestAndLock(dep.depender.id, dep.depender.v) if err != nil || l == nil { // err being non-nil really shouldn't be possible, but the lock // being nil is quite likely @@ -816,7 +816,7 @@ func (s *solver) getLockVersionIfValid(id ProjectIdentifier) (Version, error) { // to be found and attempted in the repository. If it's only in vendor, // though, then we have to try to use what's in the lock, because that's // the only version we'll be able to get. - if exist, _ := s.b.repoExists(id); exist { + if exist, _ := s.b.SourceExists(id); exist { return nil, nil } @@ -1001,8 +1001,8 @@ func (s *solver) unselectedComparator(i, j int) bool { // We can safely ignore an err from ListVersions here because, if there is // an actual problem, it'll be noted and handled somewhere else saner in the // solving algorithm. - ivl, _ := s.b.listVersions(iname) - jvl, _ := s.b.listVersions(jname) + ivl, _ := s.b.ListVersions(iname) + jvl, _ := s.b.ListVersions(jname) iv, jv := len(ivl), len(jvl) // Packages with fewer versions to pick from are less likely to benefit from @@ -1060,7 +1060,7 @@ func (s *solver) selectAtom(a atomWithPackages, pkgonly bool) { // If this atom has a lock, pull it out so that we can potentially inject // preferred versions into any bmis we enqueue - _, l, _ := s.b.getManifestAndLock(a.a) + _, l, _ := s.b.GetManifestAndLock(a.a.id, a.a.v) var lmap map[ProjectIdentifier]Version if l != nil { lmap = make(map[ProjectIdentifier]Version) diff --git a/source.go b/source.go new file mode 100644 index 0000000..feaba15 --- /dev/null +++ b/source.go @@ -0,0 +1,328 @@ +package gps + +import "fmt" + +type source interface { + checkExistence(sourceExistence) bool + exportVersionTo(Version, string) error + getManifestAndLock(ProjectRoot, Version) (Manifest, Lock, error) + listPackages(ProjectRoot, Version) (PackageTree, error) + listVersions() ([]Version, error) + revisionPresentIn(Revision) (bool, error) +} + +type sourceMetaCache struct { + //Version string // TODO(sdboyer) use this + infos map[Revision]projectInfo + ptrees map[Revision]PackageTree + vMap map[UnpairedVersion]Revision + rMap map[Revision][]UnpairedVersion + // TODO(sdboyer) mutexes. actually probably just one, b/c complexity +} + +// projectInfo holds manifest and lock +type projectInfo struct { + Manifest + Lock +} + +type existence struct { + // The existence levels for which a search/check has been performed + s sourceExistence + + // The existence levels verified to be present through searching + f sourceExistence +} + +func newMetaCache() *sourceMetaCache { + return &sourceMetaCache{ + infos: make(map[Revision]projectInfo), + ptrees: make(map[Revision]PackageTree), + vMap: make(map[UnpairedVersion]Revision), + rMap: make(map[Revision][]UnpairedVersion), + } +} + +type baseVCSSource struct { + // Object for the cache repository + crepo *repo + + // Indicates the extent to which we have searched for, and verified, the + // existence of the project/repo. + ex existence + + // ProjectAnalyzer used to fulfill getManifestAndLock + an ProjectAnalyzer + + // Whether the cache has the latest info on versions + cvsync bool + + // The project metadata cache. This is (or is intended to be) persisted to + // disk, for reuse across solver runs. + dc *sourceMetaCache + + // lvfunc allows the other vcs source types that embed this type to inject + // their listVersions func into the baseSource, for use as needed. + lvfunc func() (vlist []Version, err error) +} + +func (bs *baseVCSSource) getManifestAndLock(r ProjectRoot, v Version) (Manifest, Lock, error) { + if err := bs.ensureCacheExistence(); err != nil { + return nil, nil, err + } + + rev, err := bs.toRevOrErr(v) + if err != nil { + return nil, nil, err + } + + // Return the info from the cache, if we already have it + if pi, exists := bs.dc.infos[rev]; exists { + return pi.Manifest, pi.Lock, nil + } + + bs.crepo.mut.Lock() + if !bs.crepo.synced { + err = bs.crepo.r.Update() + if err != nil { + return nil, nil, fmt.Errorf("could not fetch latest updates into repository") + } + bs.crepo.synced = true + } + + // Always prefer a rev, if it's available + if pv, ok := v.(PairedVersion); ok { + err = bs.crepo.r.UpdateVersion(pv.Underlying().String()) + } else { + err = bs.crepo.r.UpdateVersion(v.String()) + } + bs.crepo.mut.Unlock() + + if err != nil { + // TODO(sdboyer) More-er proper-er error + panic(fmt.Sprintf("canary - why is checkout/whatever failing: %s %s %s", bs.crepo.r.LocalPath(), v.String(), err)) + } + + bs.crepo.mut.RLock() + m, l, err := bs.an.DeriveManifestAndLock(bs.crepo.r.LocalPath(), r) + // TODO(sdboyer) cache results + bs.crepo.mut.RUnlock() + + if err == nil { + if l != nil { + l = prepLock(l) + } + + // If m is nil, prepManifest will provide an empty one. + pi := projectInfo{ + Manifest: prepManifest(m), + Lock: l, + } + + bs.dc.infos[rev] = pi + + return pi.Manifest, pi.Lock, nil + } + + return nil, nil, err +} + +// toRevision turns a Version into a Revision, if doing so is possible based on +// the information contained in the version itself, or in the cache maps. +func (dc *sourceMetaCache) toRevision(v Version) Revision { + switch t := v.(type) { + case Revision: + return t + case PairedVersion: + return t.Underlying() + case UnpairedVersion: + // This will return the empty rev (empty string) if we don't have a + // record of it. It's up to the caller to decide, for example, if + // it's appropriate to update the cache. + return dc.vMap[t] + default: + panic(fmt.Sprintf("Unknown version type %T", v)) + } +} + +// toUnpaired turns a Version into an UnpairedVersion, if doing so is possible +// based on the information contained in the version itself, or in the cache +// maps. +// +// If the input is a revision and multiple UnpairedVersions are associated with +// it, whatever happens to be the first is returned. +func (dc *sourceMetaCache) toUnpaired(v Version) UnpairedVersion { + switch t := v.(type) { + case UnpairedVersion: + return t + case PairedVersion: + return t.Unpair() + case Revision: + if upv, has := dc.rMap[t]; has && len(upv) > 0 { + return upv[0] + } + return nil + default: + panic(fmt.Sprintf("unknown version type %T", v)) + } +} + +func (bs *baseVCSSource) revisionPresentIn(r Revision) (bool, error) { + // First and fastest path is to check the data cache to see if the rev is + // present. This could give us false positives, but the cases where that can + // occur would require a type of cache staleness that seems *exceedingly* + // unlikely to occur. + if _, has := bs.dc.infos[r]; has { + return true, nil + } else if _, has := bs.dc.rMap[r]; has { + return true, nil + } + + err := bs.ensureCacheExistence() + if err != nil { + return false, err + } + + bs.crepo.mut.RLock() + defer bs.crepo.mut.RUnlock() + return bs.crepo.r.IsReference(string(r)), nil +} + +func (bs *baseVCSSource) ensureCacheExistence() error { + // Technically, methods could could attempt to return straight from the + // metadata cache even if the repo cache doesn't exist on disk. But that + // would allow weird state inconsistencies (cache exists, but no repo...how + // does that even happen?) that it'd be better to just not allow so that we + // don't have to think about it elsewhere + if !bs.checkExistence(existsInCache) { + if bs.checkExistence(existsUpstream) { + bs.crepo.mut.Lock() + err := bs.crepo.r.Get() + bs.crepo.mut.Unlock() + + if err != nil { + return fmt.Errorf("failed to create repository cache for %s", bs.crepo.r.Remote()) + } + bs.crepo.synced = true + bs.ex.s |= existsInCache + bs.ex.f |= existsInCache + } else { + return fmt.Errorf("project %s does not exist upstream", bs.crepo.r.Remote()) + } + } + + return nil +} + +// checkExistence provides a direct method for querying existence levels of the +// source. It will only perform actual searching (local fs or over the network) +// if no previous attempt at that search has been made. +// +// Note that this may perform read-ish operations on the cache repo, and it +// takes a lock accordingly. This makes it unsafe to call from a segment where +// the cache repo mutex is already write-locked, as deadlock will occur. +func (bs *baseVCSSource) checkExistence(ex sourceExistence) bool { + if bs.ex.s&ex != ex { + if ex&existsInVendorRoot != 0 && bs.ex.s&existsInVendorRoot == 0 { + panic("should now be implemented in bridge") + } + if ex&existsInCache != 0 && bs.ex.s&existsInCache == 0 { + bs.crepo.mut.RLock() + bs.ex.s |= existsInCache + if bs.crepo.r.CheckLocal() { + bs.ex.f |= existsInCache + } + bs.crepo.mut.RUnlock() + } + if ex&existsUpstream != 0 && bs.ex.s&existsUpstream == 0 { + bs.crepo.mut.RLock() + bs.ex.s |= existsUpstream + if bs.crepo.r.Ping() { + bs.ex.f |= existsUpstream + } + bs.crepo.mut.RUnlock() + } + } + + return ex&bs.ex.f == ex +} + +func (bs *baseVCSSource) listPackages(pr ProjectRoot, v Version) (ptree PackageTree, err error) { + if err = bs.ensureCacheExistence(); err != nil { + return + } + + var r Revision + if r, err = bs.toRevOrErr(v); err != nil { + return + } + + // Return the ptree from the cache, if we already have it + var exists bool + if ptree, exists = bs.dc.ptrees[r]; exists { + return + } + + // Not in the cache; check out the version and do the analysis + bs.crepo.mut.Lock() + // Check out the desired version for analysis + if r != "" { + // Always prefer a rev, if it's available + err = bs.crepo.r.UpdateVersion(string(r)) + } else { + // If we don't have a rev, ensure the repo is up to date, otherwise we + // could have a desync issue + if !bs.crepo.synced { + err = bs.crepo.r.Update() + if err != nil { + return PackageTree{}, fmt.Errorf("could not fetch latest updates into repository: %s", err) + } + bs.crepo.synced = true + } + err = bs.crepo.r.UpdateVersion(v.String()) + } + + ptree, err = listPackages(bs.crepo.r.LocalPath(), string(pr)) + bs.crepo.mut.Unlock() + + // TODO(sdboyer) cache errs? + if err != nil { + bs.dc.ptrees[r] = ptree + } + + return +} + +// toRevOrErr makes all efforts to convert a Version into a rev, including +// updating the cache repo (if needed). It does not guarantee that the returned +// Revision actually exists in the repository (as one of the cheaper methods may +// have had bad data). +func (bs *baseVCSSource) toRevOrErr(v Version) (r Revision, err error) { + r = bs.dc.toRevision(v) + if r == "" { + // Rev can be empty if: + // - The cache is unsynced + // - A version was passed that used to exist, but no longer does + // - A garbage version was passed. (Functionally indistinguishable from + // the previous) + if !bs.cvsync { + // call the lvfunc to sync the meta cache + _, err = bs.lvfunc() + if err != nil { + return + } + } + + r = bs.dc.toRevision(v) + // If we still don't have a rev, then the version's no good + if r == "" { + err = fmt.Errorf("version %s does not exist in source %s", v, bs.crepo.r.Remote()) + } + } + + return +} + +func (bs *baseVCSSource) exportVersionTo(v Version, to string) error { + return bs.crepo.exportVersionTo(v, to) +} diff --git a/source_manager.go b/source_manager.go index 7403025..11ec567 100644 --- a/source_manager.go +++ b/source_manager.go @@ -1,58 +1,64 @@ package gps import ( - "encoding/json" "fmt" - "go/build" "os" - "path" + "path/filepath" + "strings" + "sync" + "sync/atomic" "github.com/Masterminds/semver" - "github.com/Masterminds/vcs" ) +// Used to compute a friendly filepath from a URL-shaped input +// +// TODO(sdboyer) this is awful. Right? +var sanitizer = strings.NewReplacer(":", "-", "/", "-", "+", "-") + // A SourceManager is responsible for retrieving, managing, and interrogating // source repositories. Its primary purpose is to serve the needs of a Solver, // but it is handy for other purposes, as well. // -// gps's built-in SourceManager, accessible via NewSourceManager(), is -// intended to be generic and sufficient for any purpose. It provides some -// additional semantics around the methods defined here. +// gps's built-in SourceManager, SourceMgr, is intended to be generic and +// sufficient for any purpose. It provides some additional semantics around the +// methods defined here. type SourceManager interface { - // RepoExists checks if a repository exists, either upstream or in the + // SourceExists checks if a repository exists, either upstream or in the // SourceManager's central repository cache. - RepoExists(ProjectRoot) (bool, error) + SourceExists(ProjectIdentifier) (bool, error) // ListVersions retrieves a list of the available versions for a given // repository name. - ListVersions(ProjectRoot) ([]Version, error) + ListVersions(ProjectIdentifier) ([]Version, error) // RevisionPresentIn indicates whether the provided Version is present in // the given repository. - RevisionPresentIn(ProjectRoot, Revision) (bool, error) + RevisionPresentIn(ProjectIdentifier, Revision) (bool, error) - // ListPackages retrieves a tree of the Go packages at or below the provided - // import path, at the provided version. - ListPackages(ProjectRoot, Version) (PackageTree, error) + // ListPackages parses the tree of the Go packages at or below root of the + // provided ProjectIdentifier, at the provided version. + ListPackages(ProjectIdentifier, Version) (PackageTree, error) // GetManifestAndLock returns manifest and lock information for the provided // root import path. // - // gps currently requires that projects be rooted at their - // repository root, necessitating that this ProjectRoot must also be a + // gps currently requires that projects be rooted at their repository root, + // necessitating that the ProjectIdentifier's ProjectRoot must also be a // repository root. - GetManifestAndLock(ProjectRoot, Version) (Manifest, Lock, error) + GetManifestAndLock(ProjectIdentifier, Version) (Manifest, Lock, error) + + // ExportProject writes out the tree of the provided import path, at the + // provided version, to the provided directory. + ExportProject(ProjectIdentifier, Version, string) error // AnalyzerInfo reports the name and version of the logic used to service // GetManifestAndLock(). AnalyzerInfo() (name string, version *semver.Version) - // ExportProject writes out the tree of the provided import path, at the - // provided version, to the provided directory. - ExportProject(ProjectRoot, Version, string) error - - // Release lets go of any locks held by the SourceManager. - Release() + // DeduceRootProject takes an import path and deduces the corresponding + // project/source root. + DeduceProjectRoot(ip string) (ProjectRoot, error) } // A ProjectAnalyzer is responsible for analyzing a given path for Manifest and @@ -62,6 +68,7 @@ type ProjectAnalyzer interface { // root import path importRoot, to determine the project's constraints, as // indicated by a Manifest and Lock. DeriveManifestAndLock(path string, importRoot ProjectRoot) (Manifest, Lock, error) + // Report the name and version of this ProjectAnalyzer. Info() (name string, version *semver.Version) } @@ -72,22 +79,15 @@ type ProjectAnalyzer interface { // tools; control via dependency injection is intended to be sufficient. type SourceMgr struct { cachedir string - pms map[ProjectRoot]*pmState + srcs map[string]source + srcmut sync.RWMutex an ProjectAnalyzer - ctx build.Context - //pme map[ProjectRoot]error + dxt deducerTrie + rootxt prTrie } var _ SourceManager = &SourceMgr{} -// Holds a projectManager, caches of the managed project's data, and information -// about the freshness of those caches -type pmState struct { - pm *projectManager - cf *os.File // handle for the cache file - vcur bool // indicates that we've called ListVersions() -} - // NewSourceManager produces an instance of gps's built-in SourceManager. It // takes a cache directory (where local instances of upstream repositories are // stored), a vendor directory for the project currently being worked on, and a @@ -110,12 +110,12 @@ func NewSourceManager(an ProjectAnalyzer, cachedir string, force bool) (*SourceM return nil, fmt.Errorf("a ProjectAnalyzer must be provided to the SourceManager") } - err := os.MkdirAll(cachedir, 0777) + err := os.MkdirAll(filepath.Join(cachedir, "sources"), 0777) if err != nil { return nil, err } - glpath := path.Join(cachedir, "sm.lock") + glpath := filepath.Join(cachedir, "sm.lock") _, err = os.Stat(glpath) if err == nil && !force { return nil, fmt.Errorf("cache lock file %s exists - another process crashed or is still running?", glpath) @@ -126,21 +126,18 @@ func NewSourceManager(an ProjectAnalyzer, cachedir string, force bool) (*SourceM return nil, fmt.Errorf("failed to create global cache lock file at %s with err %s", glpath, err) } - ctx := build.Default - // Replace GOPATH with our cache dir - ctx.GOPATH = cachedir - return &SourceMgr{ cachedir: cachedir, - pms: make(map[ProjectRoot]*pmState), - ctx: ctx, + srcs: make(map[string]source), an: an, + dxt: pathDeducerTrie(), + rootxt: newProjectRootTrie(), }, nil } // Release lets go of any locks held by the SourceManager. func (sm *SourceMgr) Release() { - os.Remove(path.Join(sm.cachedir, "sm.lock")) + os.Remove(filepath.Join(sm.cachedir, "sm.lock")) } // AnalyzerInfo reports the name and version of the injected ProjectAnalyzer. @@ -148,30 +145,31 @@ func (sm *SourceMgr) AnalyzerInfo() (name string, version *semver.Version) { return sm.an.Info() } -// GetManifestAndLock returns manifest and lock information for the provided import -// path. gps currently requires that projects be rooted at their repository -// root, which means that this ProjectRoot must also be a repository root. +// GetManifestAndLock returns manifest and lock information for the provided +// import path. gps currently requires that projects be rooted at their +// repository root, necessitating that the ProjectIdentifier's ProjectRoot must +// also be a repository root. // // The work of producing the manifest and lock is delegated to the injected // ProjectAnalyzer's DeriveManifestAndLock() method. -func (sm *SourceMgr) GetManifestAndLock(n ProjectRoot, v Version) (Manifest, Lock, error) { - pmc, err := sm.getProjectManager(n) +func (sm *SourceMgr) GetManifestAndLock(id ProjectIdentifier, v Version) (Manifest, Lock, error) { + src, err := sm.getSourceFor(id) if err != nil { return nil, nil, err } - return pmc.pm.GetInfoAt(v) + return src.getManifestAndLock(id.ProjectRoot, v) } -// ListPackages retrieves a tree of the Go packages at or below the provided -// import path, at the provided version. -func (sm *SourceMgr) ListPackages(n ProjectRoot, v Version) (PackageTree, error) { - pmc, err := sm.getProjectManager(n) +// ListPackages parses the tree of the Go packages at and below the ProjectRoot +// of the given ProjectIdentifier, at the given version. +func (sm *SourceMgr) ListPackages(id ProjectIdentifier, v Version) (PackageTree, error) { + src, err := sm.getSourceFor(id) if err != nil { return PackageTree{}, err } - return pmc.pm.ListPackages(v) + return src.listPackages(id.ProjectRoot, v) } // ListVersions retrieves a list of the available versions for a given @@ -182,133 +180,219 @@ func (sm *SourceMgr) ListPackages(n ProjectRoot, v Version) (PackageTree, error) // expected that the caller either not care about order, or sort the result // themselves. // -// This list is always retrieved from upstream; if upstream is not accessible -// (network outage, access issues, or the resource actually went away), an error -// will be returned. -func (sm *SourceMgr) ListVersions(n ProjectRoot) ([]Version, error) { - pmc, err := sm.getProjectManager(n) +// This list is always retrieved from upstream on the first call. Subsequent +// calls will return a cached version of the first call's results. if upstream +// is not accessible (network outage, access issues, or the resource actually +// went away), an error will be returned. +func (sm *SourceMgr) ListVersions(id ProjectIdentifier) ([]Version, error) { + src, err := sm.getSourceFor(id) if err != nil { // TODO(sdboyer) More-er proper-er errors return nil, err } - return pmc.pm.ListVersions() + return src.listVersions() } // RevisionPresentIn indicates whether the provided Revision is present in the given // repository. -func (sm *SourceMgr) RevisionPresentIn(n ProjectRoot, r Revision) (bool, error) { - pmc, err := sm.getProjectManager(n) +func (sm *SourceMgr) RevisionPresentIn(id ProjectIdentifier, r Revision) (bool, error) { + src, err := sm.getSourceFor(id) if err != nil { // TODO(sdboyer) More-er proper-er errors return false, err } - return pmc.pm.RevisionPresentIn(r) + return src.revisionPresentIn(r) } -// RepoExists checks if a repository exists, either upstream or in the cache, -// for the provided ProjectRoot. -func (sm *SourceMgr) RepoExists(n ProjectRoot) (bool, error) { - pms, err := sm.getProjectManager(n) +// SourceExists checks if a repository exists, either upstream or in the cache, +// for the provided ProjectIdentifier. +func (sm *SourceMgr) SourceExists(id ProjectIdentifier) (bool, error) { + src, err := sm.getSourceFor(id) if err != nil { return false, err } - return pms.pm.CheckExistence(existsInCache) || pms.pm.CheckExistence(existsUpstream), nil + return src.checkExistence(existsInCache) || src.checkExistence(existsUpstream), nil } -// ExportProject writes out the tree of the provided import path, at the -// provided version, to the provided directory. -func (sm *SourceMgr) ExportProject(n ProjectRoot, v Version, to string) error { - pms, err := sm.getProjectManager(n) +// ExportProject writes out the tree of the provided ProjectIdentifier's +// ProjectRoot, at the provided version, to the provided directory. +func (sm *SourceMgr) ExportProject(id ProjectIdentifier, v Version, to string) error { + src, err := sm.getSourceFor(id) if err != nil { return err } - return pms.pm.ExportVersionTo(v, to) + return src.exportVersionTo(v, to) } -// getProjectManager gets the project manager for the given ProjectRoot. +// DeduceRootProject takes an import path and deduces the corresponding +// project/source root. // -// If no such manager yet exists, it attempts to create one. -func (sm *SourceMgr) getProjectManager(n ProjectRoot) (*pmState, error) { - // Check pm cache and errcache first - if pm, exists := sm.pms[n]; exists { - return pm, nil - //} else if pme, errexists := sm.pme[name]; errexists { - //return nil, pme +// Note that some import paths may require network activity to correctly +// determine the root of the path, such as, but not limited to, vanity import +// paths. (A special exception is written for gopkg.in to minimize network +// activity, as its behavior is well-structured) +func (sm *SourceMgr) DeduceProjectRoot(ip string) (ProjectRoot, error) { + if prefix, root, has := sm.rootxt.LongestPrefix(ip); has { + // The non-matching tail of the import path could still be malformed. + // Validate just that part, if it exists + if prefix != ip { + if !pathvld.MatchString(strings.TrimPrefix(ip, prefix)) { + return "", fmt.Errorf("%q is not a valid import path", ip) + } + // There was one, and it validated fine - add it so we don't have to + // revalidate it later + sm.rootxt.Insert(ip, root) + } + return root, nil } - repodir := path.Join(sm.cachedir, "src", string(n)) - // TODO(sdboyer) be more robust about this - r, err := vcs.NewRepo("https://"+string(n), repodir) + rootf, _, err := sm.deducePathAndProcess(ip) if err != nil { - // TODO(sdboyer) be better - return nil, err + return "", err } - if !r.CheckLocal() { - // TODO(sdboyer) cloning the repo here puts it on a blocking, and possibly - // unnecessary path. defer it - err = r.Get() - if err != nil { - // TODO(sdboyer) be better - return nil, err - } + + r, err := rootf() + return ProjectRoot(r), err +} + +func (sm *SourceMgr) getSourceFor(id ProjectIdentifier) (source, error) { + nn := id.netName() + + sm.srcmut.RLock() + src, has := sm.srcs[nn] + sm.srcmut.RUnlock() + if has { + return src, nil } - // Ensure cache dir exists - metadir := path.Join(sm.cachedir, "metadata", string(n)) - err = os.MkdirAll(metadir, 0777) + _, srcf, err := sm.deducePathAndProcess(nn) if err != nil { - // TODO(sdboyer) be better return nil, err } - pms := &pmState{} - cpath := path.Join(metadir, "cache.json") - fi, err := os.Stat(cpath) - var dc *projectDataCache - if fi != nil { - pms.cf, err = os.OpenFile(cpath, os.O_RDWR, 0777) - if err != nil { - // TODO(sdboyer) be better - return nil, fmt.Errorf("Err on opening metadata cache file: %s", err) - } + // we don't care about the ident here, and the future produced by + // deducePathAndProcess will dedupe with what's in the sm.srcs map + src, _, err = srcf() + return src, err +} - err = json.NewDecoder(pms.cf).Decode(dc) - if err != nil { - // TODO(sdboyer) be better - return nil, fmt.Errorf("Err on JSON decoding metadata cache file: %s", err) - } - } else { - // TODO(sdboyer) commented this out for now, until we manage it correctly - //pms.cf, err = os.Create(cpath) - //if err != nil { - //// TODO(sdboyer) be better - //return nil, fmt.Errorf("Err on creating metadata cache file: %s", err) - //} - - dc = &projectDataCache{ - Infos: make(map[Revision]projectInfo), - Packages: make(map[Revision]PackageTree), - VMap: make(map[Version]Revision), - RMap: make(map[Revision][]Version), +func (sm *SourceMgr) deducePathAndProcess(path string) (stringFuture, sourceFuture, error) { + df, err := sm.deduceFromPath(path) + if err != nil { + return nil, nil, err + } + + var rstart, sstart int32 + rc, sc := make(chan struct{}, 1), make(chan struct{}, 1) + + // Rewrap in a deferred future, so the caller can decide when to trigger it + rootf := func() (pr string, err error) { + // CAS because a bad interleaving here would panic on double-closing rc + if atomic.CompareAndSwapInt32(&rstart, 0, 1) { + go func() { + defer close(rc) + pr, err = df.root() + if err != nil { + // Don't cache errs. This doesn't really hurt the solver, and is + // beneficial for other use cases because it means we don't have to + // expose any kind of controls for clearing caches. + return + } + + tpr := ProjectRoot(pr) + sm.rootxt.Insert(pr, tpr) + // It's not harmful if the netname was a URL rather than an + // import path + if pr != path { + // Insert the result into the rootxt twice - once at the + // root itself, so as to catch siblings/relatives, and again + // at the exact provided import path (assuming they were + // different), so that on subsequent calls, exact matches + // can skip the regex above. + sm.rootxt.Insert(path, tpr) + } + }() } + + <-rc + return pr, err } - pm := &projectManager{ - n: n, - ctx: sm.ctx, - an: sm.an, - dc: dc, - crepo: &repo{ - rpath: repodir, - r: r, - }, + // Now, handle the source + fut := df.psf(sm.cachedir, sm.an) + + // Rewrap in a deferred future, so the caller can decide when to trigger it + srcf := func() (src source, ident string, err error) { + // CAS because a bad interleaving here would panic on double-closing sc + if atomic.CompareAndSwapInt32(&sstart, 0, 1) { + go func() { + defer close(sc) + src, ident, err = fut() + if err != nil { + // Don't cache errs. This doesn't really hurt the solver, and is + // beneficial for other use cases because it means we don't have + // to expose any kind of controls for clearing caches. + return + } + + sm.srcmut.Lock() + defer sm.srcmut.Unlock() + + // Check to make sure a source hasn't shown up in the meantime, or that + // there wasn't already one at the ident. + var hasi, hasp bool + var srci, srcp source + if ident != "" { + srci, hasi = sm.srcs[ident] + } + srcp, hasp = sm.srcs[path] + + // if neither the ident nor the input path have an entry for this src, + // we're in the simple case - write them both in and we're done + if !hasi && !hasp { + sm.srcs[path] = src + if ident != path && ident != "" { + sm.srcs[ident] = src + } + return + } + + // Now, the xors. + // + // If already present for ident but not for path, copy ident's src + // to path. This covers cases like a gopkg.in path referring back + // onto a github repository, where something else already explicitly + // looked up that same gh repo. + if hasi && !hasp { + sm.srcs[path] = srci + src = srci + } + // If already present for path but not for ident, do NOT copy path's + // src to ident, but use the returned one instead. Really, this case + // shouldn't occur at all...? But the crucial thing is that the + // path-based one has already discovered what actual ident of source + // they want to use, and changing that arbitrarily would have + // undefined effects. + if hasp && !hasi && ident != "" { + sm.srcs[ident] = src + } + + // If both are present, then assume we're good, and use the path one + if hasp && hasi { + // TODO(sdboyer) compare these (somehow? reflect? pointer?) and if they're not the + // same object, panic + src = srcp + } + }() + } + + <-sc + return } - pms.pm = pm - sm.pms[n] = pms - return pms, nil + return rootf, srcf, nil } diff --git a/source_test.go b/source_test.go new file mode 100644 index 0000000..907d9c3 --- /dev/null +++ b/source_test.go @@ -0,0 +1,319 @@ +package gps + +import ( + "io/ioutil" + "net/url" + "reflect" + "sort" + "testing" +) + +func TestGitSourceInteractions(t *testing.T) { + // This test is slowish, skip it on -short + if testing.Short() { + t.Skip("Skipping git source version fetching test in short mode") + } + + cpath, err := ioutil.TempDir("", "smcache") + if err != nil { + t.Errorf("Failed to create temp dir: %s", err) + } + rf := func() { + err := removeAll(cpath) + if err != nil { + t.Errorf("removeAll failed: %s", err) + } + } + + n := "github.com/Masterminds/VCSTestRepo" + un := "https://" + n + u, err := url.Parse(un) + if err != nil { + t.Errorf("URL was bad, lolwut? errtext: %s", err) + rf() + t.FailNow() + } + mb := maybeGitSource{ + url: u, + } + + isrc, ident, err := mb.try(cpath, naiveAnalyzer{}) + if err != nil { + t.Errorf("Unexpected error while setting up gitSource for test repo: %s", err) + rf() + t.FailNow() + } + src, ok := isrc.(*gitSource) + if !ok { + t.Errorf("Expected a gitSource, got a %T", isrc) + rf() + t.FailNow() + } + if ident != un { + t.Errorf("Expected %s as source ident, got %s", un, ident) + } + + vlist, err := src.listVersions() + if err != nil { + t.Errorf("Unexpected error getting version pairs from git repo: %s", err) + rf() + t.FailNow() + } + + if src.ex.s&existsUpstream != existsUpstream { + t.Errorf("gitSource.listVersions() should have set the upstream existence bit for search") + } + if src.ex.f&existsUpstream != existsUpstream { + t.Errorf("gitSource.listVersions() should have set the upstream existence bit for found") + } + if src.ex.s&existsInCache != 0 { + t.Errorf("gitSource.listVersions() should not have set the cache existence bit for search") + } + if src.ex.f&existsInCache != 0 { + t.Errorf("gitSource.listVersions() should not have set the cache existence bit for found") + } + + // check that an expected rev is present + is, err := src.revisionPresentIn(Revision("30605f6ac35fcb075ad0bfa9296f90a7d891523e")) + if err != nil { + t.Errorf("Unexpected error while checking revision presence: %s", err) + } else if !is { + t.Errorf("Revision that should exist was not present") + } + + if len(vlist) != 3 { + t.Errorf("git test repo should've produced three versions, got %v: vlist was %s", len(vlist), vlist) + } else { + sort.Sort(upgradeVersionSorter(vlist)) + evl := []Version{ + NewVersion("1.0.0").Is(Revision("30605f6ac35fcb075ad0bfa9296f90a7d891523e")), + NewBranch("master").Is(Revision("30605f6ac35fcb075ad0bfa9296f90a7d891523e")), + NewBranch("test").Is(Revision("30605f6ac35fcb075ad0bfa9296f90a7d891523e")), + } + if !reflect.DeepEqual(vlist, evl) { + t.Errorf("Version list was not what we expected:\n\t(GOT): %s\n\t(WNT): %s", vlist, evl) + } + } + + // recheck that rev is present, this time interacting with cache differently + is, err = src.revisionPresentIn(Revision("30605f6ac35fcb075ad0bfa9296f90a7d891523e")) + if err != nil { + t.Errorf("Unexpected error while re-checking revision presence: %s", err) + } else if !is { + t.Errorf("Revision that should exist was not present on re-check") + } +} + +func TestBzrSourceInteractions(t *testing.T) { + // This test is quite slow (ugh bzr), so skip it on -short + if testing.Short() { + t.Skip("Skipping bzr source version fetching test in short mode") + } + + cpath, err := ioutil.TempDir("", "smcache") + if err != nil { + t.Errorf("Failed to create temp dir: %s", err) + } + rf := func() { + err := removeAll(cpath) + if err != nil { + t.Errorf("removeAll failed: %s", err) + } + } + + n := "launchpad.net/govcstestbzrrepo" + un := "https://" + n + u, err := url.Parse(un) + if err != nil { + t.Errorf("URL was bad, lolwut? errtext: %s", err) + rf() + t.FailNow() + } + mb := maybeBzrSource{ + url: u, + } + + isrc, ident, err := mb.try(cpath, naiveAnalyzer{}) + if err != nil { + t.Errorf("Unexpected error while setting up bzrSource for test repo: %s", err) + rf() + t.FailNow() + } + src, ok := isrc.(*bzrSource) + if !ok { + t.Errorf("Expected a bzrSource, got a %T", isrc) + rf() + t.FailNow() + } + if ident != un { + t.Errorf("Expected %s as source ident, got %s", un, ident) + } + + // check that an expected rev is present + is, err := src.revisionPresentIn(Revision("matt@mattfarina.com-20150731135137-pbphasfppmygpl68")) + if err != nil { + t.Errorf("Unexpected error while checking revision presence: %s", err) + } else if !is { + t.Errorf("Revision that should exist was not present") + } + + vlist, err := src.listVersions() + if err != nil { + t.Errorf("Unexpected error getting version pairs from bzr repo: %s", err) + } + + if src.ex.s&existsUpstream|existsInCache != existsUpstream|existsInCache { + t.Errorf("bzrSource.listVersions() should have set the upstream and cache existence bits for search") + } + if src.ex.f&existsUpstream|existsInCache != existsUpstream|existsInCache { + t.Errorf("bzrSource.listVersions() should have set the upstream and cache existence bits for found") + } + + if len(vlist) != 1 { + t.Errorf("bzr test repo should've produced one version, got %v", len(vlist)) + } else { + v := NewVersion("1.0.0").Is(Revision("matt@mattfarina.com-20150731135137-pbphasfppmygpl68")) + if vlist[0] != v { + t.Errorf("bzr pair fetch reported incorrect first version, got %s", vlist[0]) + } + } + + // Run again, this time to ensure cache outputs correctly + vlist, err = src.listVersions() + if err != nil { + t.Errorf("Unexpected error getting version pairs from bzr repo: %s", err) + } + + if src.ex.s&existsUpstream|existsInCache != existsUpstream|existsInCache { + t.Errorf("bzrSource.listVersions() should have set the upstream and cache existence bits for search") + } + if src.ex.f&existsUpstream|existsInCache != existsUpstream|existsInCache { + t.Errorf("bzrSource.listVersions() should have set the upstream and cache existence bits for found") + } + + if len(vlist) != 1 { + t.Errorf("bzr test repo should've produced one version, got %v", len(vlist)) + } else { + v := NewVersion("1.0.0").Is(Revision("matt@mattfarina.com-20150731135137-pbphasfppmygpl68")) + if vlist[0] != v { + t.Errorf("bzr pair fetch reported incorrect first version, got %s", vlist[0]) + } + } + + // recheck that rev is present, this time interacting with cache differently + is, err = src.revisionPresentIn(Revision("matt@mattfarina.com-20150731135137-pbphasfppmygpl68")) + if err != nil { + t.Errorf("Unexpected error while re-checking revision presence: %s", err) + } else if !is { + t.Errorf("Revision that should exist was not present on re-check") + } +} + +func TestHgSourceInteractions(t *testing.T) { + // This test is slow, so skip it on -short + if testing.Short() { + t.Skip("Skipping hg source version fetching test in short mode") + } + + cpath, err := ioutil.TempDir("", "smcache") + if err != nil { + t.Errorf("Failed to create temp dir: %s", err) + } + rf := func() { + err := removeAll(cpath) + if err != nil { + t.Errorf("removeAll failed: %s", err) + } + } + + n := "bitbucket.org/mattfarina/testhgrepo" + un := "https://" + n + u, err := url.Parse(un) + if err != nil { + t.Errorf("URL was bad, lolwut? errtext: %s", err) + rf() + t.FailNow() + } + mb := maybeHgSource{ + url: u, + } + + isrc, ident, err := mb.try(cpath, naiveAnalyzer{}) + if err != nil { + t.Errorf("Unexpected error while setting up hgSource for test repo: %s", err) + rf() + t.FailNow() + } + src, ok := isrc.(*hgSource) + if !ok { + t.Errorf("Expected a hgSource, got a %T", isrc) + rf() + t.FailNow() + } + if ident != un { + t.Errorf("Expected %s as source ident, got %s", un, ident) + } + + // check that an expected rev is present + is, err := src.revisionPresentIn(Revision("d680e82228d206935ab2eaa88612587abe68db07")) + if err != nil { + t.Errorf("Unexpected error while checking revision presence: %s", err) + } else if !is { + t.Errorf("Revision that should exist was not present") + } + + vlist, err := src.listVersions() + if err != nil { + t.Errorf("Unexpected error getting version pairs from hg repo: %s", err) + } + evl := []Version{ + NewVersion("1.0.0").Is(Revision("d680e82228d206935ab2eaa88612587abe68db07")), + NewBranch("test").Is(Revision("6c44ee3fe5d87763616c19bf7dbcadb24ff5a5ce")), + } + + if src.ex.s&existsUpstream|existsInCache != existsUpstream|existsInCache { + t.Errorf("hgSource.listVersions() should have set the upstream and cache existence bits for search") + } + if src.ex.f&existsUpstream|existsInCache != existsUpstream|existsInCache { + t.Errorf("hgSource.listVersions() should have set the upstream and cache existence bits for found") + } + + if len(vlist) != 2 { + t.Errorf("hg test repo should've produced one version, got %v", len(vlist)) + } else { + sort.Sort(upgradeVersionSorter(vlist)) + if !reflect.DeepEqual(vlist, evl) { + t.Errorf("Version list was not what we expected:\n\t(GOT): %s\n\t(WNT): %s", vlist, evl) + } + } + + // Run again, this time to ensure cache outputs correctly + vlist, err = src.listVersions() + if err != nil { + t.Errorf("Unexpected error getting version pairs from hg repo: %s", err) + } + + if src.ex.s&existsUpstream|existsInCache != existsUpstream|existsInCache { + t.Errorf("hgSource.listVersions() should have set the upstream and cache existence bits for search") + } + if src.ex.f&existsUpstream|existsInCache != existsUpstream|existsInCache { + t.Errorf("hgSource.listVersions() should have set the upstream and cache existence bits for found") + } + + if len(vlist) != 2 { + t.Errorf("hg test repo should've produced one version, got %v", len(vlist)) + } else { + sort.Sort(upgradeVersionSorter(vlist)) + if !reflect.DeepEqual(vlist, evl) { + t.Errorf("Version list was not what we expected:\n\t(GOT): %s\n\t(WNT): %s", vlist, evl) + } + } + + // recheck that rev is present, this time interacting with cache differently + is, err = src.revisionPresentIn(Revision("d680e82228d206935ab2eaa88612587abe68db07")) + if err != nil { + t.Errorf("Unexpected error while re-checking revision presence: %s", err) + } else if !is { + t.Errorf("Revision that should exist was not present on re-check") + } +} diff --git a/typed_radix.go b/typed_radix.go new file mode 100644 index 0000000..9f56a9b --- /dev/null +++ b/typed_radix.go @@ -0,0 +1,151 @@ +package gps + +import ( + "strings" + + "github.com/armon/go-radix" +) + +// Typed implementations of radix trees. These are just simple wrappers that let +// us avoid having to type assert anywhere else, cleaning up other code a bit. +// +// Some of the more annoying things to implement (like walks) aren't +// implemented. They can be added if/when we actually need them. +// +// Oh generics, where art thou... + +type deducerTrie struct { + t *radix.Tree +} + +func newDeducerTrie() deducerTrie { + return deducerTrie{ + t: radix.New(), + } +} + +// Delete is used to delete a key, returning the previous value and if it was deleted +func (t deducerTrie) Delete(s string) (pathDeducer, bool) { + if v, had := t.t.Delete(s); had { + return v.(pathDeducer), had + } + return nil, false +} + +// Get is used to lookup a specific key, returning the value and if it was found +func (t deducerTrie) Get(s string) (pathDeducer, bool) { + if v, has := t.t.Get(s); has { + return v.(pathDeducer), has + } + return nil, false +} + +// Insert is used to add a newentry or update an existing entry. Returns if updated. +func (t deducerTrie) Insert(s string, v pathDeducer) (pathDeducer, bool) { + if v2, had := t.t.Insert(s, v); had { + return v2.(pathDeducer), had + } + return nil, false +} + +// Len is used to return the number of elements in the tree +func (t deducerTrie) Len() int { + return t.t.Len() +} + +// LongestPrefix is like Get, but instead of an exact match, it will return the +// longest prefix match. +func (t deducerTrie) LongestPrefix(s string) (string, pathDeducer, bool) { + if p, v, has := t.t.LongestPrefix(s); has { + return p, v.(pathDeducer), has + } + return "", nil, false +} + +// ToMap is used to walk the tree and convert it to a map. +func (t deducerTrie) ToMap() map[string]pathDeducer { + m := make(map[string]pathDeducer) + t.t.Walk(func(s string, v interface{}) bool { + m[s] = v.(pathDeducer) + return false + }) + + return m +} + +type prTrie struct { + t *radix.Tree +} + +func newProjectRootTrie() prTrie { + return prTrie{ + t: radix.New(), + } +} + +// Delete is used to delete a key, returning the previous value and if it was deleted +func (t prTrie) Delete(s string) (ProjectRoot, bool) { + if v, had := t.t.Delete(s); had { + return v.(ProjectRoot), had + } + return "", false +} + +// Get is used to lookup a specific key, returning the value and if it was found +func (t prTrie) Get(s string) (ProjectRoot, bool) { + if v, has := t.t.Get(s); has { + return v.(ProjectRoot), has + } + return "", false +} + +// Insert is used to add a newentry or update an existing entry. Returns if updated. +func (t prTrie) Insert(s string, v ProjectRoot) (ProjectRoot, bool) { + if v2, had := t.t.Insert(s, v); had { + return v2.(ProjectRoot), had + } + return "", false +} + +// Len is used to return the number of elements in the tree +func (t prTrie) Len() int { + return t.t.Len() +} + +// LongestPrefix is like Get, but instead of an exact match, it will return the +// longest prefix match. +func (t prTrie) LongestPrefix(s string) (string, ProjectRoot, bool) { + if p, v, has := t.t.LongestPrefix(s); has && isPathPrefixOrEqual(p, s) { + return p, v.(ProjectRoot), has + } + return "", "", false +} + +// ToMap is used to walk the tree and convert it to a map. +func (t prTrie) ToMap() map[string]ProjectRoot { + m := make(map[string]ProjectRoot) + t.t.Walk(func(s string, v interface{}) bool { + m[s] = v.(ProjectRoot) + return false + }) + + return m +} + +// isPathPrefixOrEqual is an additional helper check to ensure that the literal +// string prefix returned from a radix tree prefix match is also a tree match. +// +// The radix tree gets it mostly right, but we have to guard against +// possibilities like this: +// +// github.com/sdboyer/foo +// github.com/sdboyer/foobar/baz +// +// The latter would incorrectly be conflated with the former. As we know we're +// operating on strings that describe paths, guard against this case by +// verifying that either the input is the same length as the match (in which +// case we know they're equal), or that the next character is a "/". +func isPathPrefixOrEqual(pre, path string) bool { + prflen := len(pre) + return prflen == len(path) || strings.Index(path[:prflen], "/") == 0 +} diff --git a/vcs_source.go b/vcs_source.go new file mode 100644 index 0000000..277b1db --- /dev/null +++ b/vcs_source.go @@ -0,0 +1,439 @@ +package gps + +import ( + "bytes" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + "sync" + + "github.com/Masterminds/vcs" + "github.com/termie/go-shutil" +) + +type vcsSource interface { + syncLocal() error + ensureLocal() error + listLocalVersionPairs() ([]PairedVersion, sourceExistence, error) + listUpstreamVersionPairs() ([]PairedVersion, sourceExistence, error) + hasRevision(Revision) (bool, error) + checkout(Version) error + exportVersionTo(Version, string) error +} + +// gitSource is a generic git repository implementation that should work with +// all standard git remotes. +type gitSource struct { + baseVCSSource +} + +func (s *gitSource) exportVersionTo(v Version, to string) error { + s.crepo.mut.Lock() + defer s.crepo.mut.Unlock() + + r := s.crepo.r + if !r.CheckLocal() { + err := r.Get() + if err != nil { + return fmt.Errorf("failed to clone repo from %s", r.Remote()) + } + } + // Back up original index + idx, bak := filepath.Join(r.LocalPath(), ".git", "index"), filepath.Join(r.LocalPath(), ".git", "origindex") + err := os.Rename(idx, bak) + if err != nil { + return err + } + + // TODO(sdboyer) could have an err here + defer os.Rename(bak, idx) + + vstr := v.String() + if rv, ok := v.(PairedVersion); ok { + vstr = rv.Underlying().String() + } + _, err = r.RunFromDir("git", "read-tree", vstr) + if err != nil { + return err + } + + // Ensure we have exactly one trailing slash + to = strings.TrimSuffix(to, string(os.PathSeparator)) + string(os.PathSeparator) + // Checkout from our temporary index to the desired target location on disk; + // now it's git's job to make it fast. Sadly, this approach *does* also + // write out vendor dirs. There doesn't appear to be a way to make + // checkout-index respect sparse checkout rules (-a supercedes it); + // the alternative is using plain checkout, though we have a bunch of + // housekeeping to do to set up, then tear down, the sparse checkout + // controls, as well as restore the original index and HEAD. + _, err = r.RunFromDir("git", "checkout-index", "-a", "--prefix="+to) + return err +} + +func (s *gitSource) listVersions() (vlist []Version, err error) { + if s.cvsync { + vlist = make([]Version, len(s.dc.vMap)) + k := 0 + for v, r := range s.dc.vMap { + vlist[k] = v.Is(r) + k++ + } + + return + } + + r := s.crepo.r + var out []byte + c := exec.Command("git", "ls-remote", r.Remote()) + // Ensure no terminal prompting for PWs + c.Env = mergeEnvLists([]string{"GIT_TERMINAL_PROMPT=0"}, os.Environ()) + out, err = c.CombinedOutput() + + all := bytes.Split(bytes.TrimSpace(out), []byte("\n")) + if err != nil || len(all) == 0 { + // TODO(sdboyer) remove this path? it really just complicates things, for + // probably not much benefit + + // ls-remote failed, probably due to bad communication or a faulty + // upstream implementation. So fetch updates, then build the list + // locally + s.crepo.mut.Lock() + err = r.Update() + s.crepo.mut.Unlock() + if err != nil { + // Definitely have a problem, now - bail out + return + } + + // Upstream and cache must exist for this to have worked, so add that to + // searched and found + s.ex.s |= existsUpstream | existsInCache + s.ex.f |= existsUpstream | existsInCache + // Also, local is definitely now synced + s.crepo.synced = true + + s.crepo.mut.RLock() + out, err = r.RunFromDir("git", "show-ref", "--dereference") + s.crepo.mut.RUnlock() + if err != nil { + // TODO(sdboyer) More-er proper-er error + return + } + + all = bytes.Split(bytes.TrimSpace(out), []byte("\n")) + if len(all) == 0 { + return nil, fmt.Errorf("no versions available for %s (this is weird)", r.Remote()) + } + } + + // Local cache may not actually exist here, but upstream definitely does + s.ex.s |= existsUpstream + s.ex.f |= existsUpstream + + smap := make(map[string]bool) + uniq := 0 + vlist = make([]Version, len(all)-1) // less 1, because always ignore HEAD + for _, pair := range all { + var v PairedVersion + if string(pair[46:51]) == "heads" { + v = NewBranch(string(pair[52:])).Is(Revision(pair[:40])).(PairedVersion) + vlist[uniq] = v + uniq++ + } else if string(pair[46:50]) == "tags" { + vstr := string(pair[51:]) + if strings.HasSuffix(vstr, "^{}") { + // If the suffix is there, then we *know* this is the rev of + // the underlying commit object that we actually want + vstr = strings.TrimSuffix(vstr, "^{}") + } else if smap[vstr] { + // Already saw the deref'd version of this tag, if one + // exists, so skip this. + continue + // Can only hit this branch if we somehow got the deref'd + // version first. Which should be impossible, but this + // covers us in case of weirdness, anyway. + } + v = NewVersion(vstr).Is(Revision(pair[:40])).(PairedVersion) + smap[vstr] = true + vlist[uniq] = v + uniq++ + } + } + + // Trim off excess from the slice + vlist = vlist[:uniq] + + // Process the version data into the cache + // + // reset the rmap and vmap, as they'll be fully repopulated by this + // TODO(sdboyer) detect out-of-sync pairings as we do this? + s.dc.vMap = make(map[UnpairedVersion]Revision) + s.dc.rMap = make(map[Revision][]UnpairedVersion) + + for _, v := range vlist { + pv := v.(PairedVersion) + u, r := pv.Unpair(), pv.Underlying() + s.dc.vMap[u] = r + s.dc.rMap[r] = append(s.dc.rMap[r], u) + } + // Mark the cache as being in sync with upstream's version list + s.cvsync = true + return +} + +// bzrSource is a generic bzr repository implementation that should work with +// all standard bazaar remotes. +type bzrSource struct { + baseVCSSource +} + +func (s *bzrSource) listVersions() (vlist []Version, err error) { + if s.cvsync { + vlist = make([]Version, len(s.dc.vMap)) + k := 0 + for v, r := range s.dc.vMap { + vlist[k] = v.Is(r) + k++ + } + + return + } + + // Must first ensure cache checkout's existence + err = s.ensureCacheExistence() + if err != nil { + return + } + r := s.crepo.r + + // Local repo won't have all the latest refs if ensureCacheExistence() + // didn't create it + if !s.crepo.synced { + s.crepo.mut.Lock() + err = r.Update() + s.crepo.mut.Unlock() + if err != nil { + return + } + + s.crepo.synced = true + } + + var out []byte + + // Now, list all the tags + out, err = r.RunFromDir("bzr", "tags", "--show-ids", "-v") + if err != nil { + return + } + + all := bytes.Split(bytes.TrimSpace(out), []byte("\n")) + + // reset the rmap and vmap, as they'll be fully repopulated by this + // TODO(sdboyer) detect out-of-sync pairings as we do this? + s.dc.vMap = make(map[UnpairedVersion]Revision) + s.dc.rMap = make(map[Revision][]UnpairedVersion) + + vlist = make([]Version, len(all)) + k := 0 + for _, line := range all { + idx := bytes.IndexByte(line, 32) // space + v := NewVersion(string(line[:idx])) + r := Revision(bytes.TrimSpace(line[idx:])) + + s.dc.vMap[v] = r + s.dc.rMap[r] = append(s.dc.rMap[r], v) + vlist[k] = v.Is(r) + k++ + } + + // Cache is now in sync with upstream's version list + s.cvsync = true + return +} + +// hgSource is a generic hg repository implementation that should work with +// all standard mercurial servers. +type hgSource struct { + baseVCSSource +} + +func (s *hgSource) listVersions() (vlist []Version, err error) { + if s.cvsync { + vlist = make([]Version, len(s.dc.vMap)) + k := 0 + for v, r := range s.dc.vMap { + vlist[k] = v.Is(r) + k++ + } + + return + } + + // Must first ensure cache checkout's existence + err = s.ensureCacheExistence() + if err != nil { + return + } + r := s.crepo.r + + // Local repo won't have all the latest refs if ensureCacheExistence() + // didn't create it + if !s.crepo.synced { + s.crepo.mut.Lock() + err = r.Update() + s.crepo.mut.Unlock() + if err != nil { + return + } + + s.crepo.synced = true + } + + var out []byte + + // Now, list all the tags + out, err = r.RunFromDir("hg", "tags", "--debug", "--verbose") + if err != nil { + return + } + + all := bytes.Split(bytes.TrimSpace(out), []byte("\n")) + lbyt := []byte("local") + nulrev := []byte("0000000000000000000000000000000000000000") + for _, line := range all { + if bytes.Equal(lbyt, line[len(line)-len(lbyt):]) { + // Skip local tags + continue + } + + // tip is magic, don't include it + if bytes.HasPrefix(line, []byte("tip")) { + continue + } + + // Split on colon; this gets us the rev and the tag plus local revno + pair := bytes.Split(line, []byte(":")) + if bytes.Equal(nulrev, pair[1]) { + // null rev indicates this tag is marked for deletion + continue + } + + idx := bytes.IndexByte(pair[0], 32) // space + v := NewVersion(string(pair[0][:idx])).Is(Revision(pair[1])).(PairedVersion) + vlist = append(vlist, v) + } + + out, err = r.RunFromDir("hg", "branches", "--debug", "--verbose") + if err != nil { + // better nothing than partial and misleading + vlist = nil + return + } + + all = bytes.Split(bytes.TrimSpace(out), []byte("\n")) + lbyt = []byte("(inactive)") + for _, line := range all { + if bytes.Equal(lbyt, line[len(line)-len(lbyt):]) { + // Skip inactive branches + continue + } + + // Split on colon; this gets us the rev and the branch plus local revno + pair := bytes.Split(line, []byte(":")) + idx := bytes.IndexByte(pair[0], 32) // space + v := NewBranch(string(pair[0][:idx])).Is(Revision(pair[1])).(PairedVersion) + vlist = append(vlist, v) + } + + // reset the rmap and vmap, as they'll be fully repopulated by this + // TODO(sdboyer) detect out-of-sync pairings as we do this? + s.dc.vMap = make(map[UnpairedVersion]Revision) + s.dc.rMap = make(map[Revision][]UnpairedVersion) + + for _, v := range vlist { + pv := v.(PairedVersion) + u, r := pv.Unpair(), pv.Underlying() + s.dc.vMap[u] = r + s.dc.rMap[r] = append(s.dc.rMap[r], u) + } + + // Cache is now in sync with upstream's version list + s.cvsync = true + return +} + +type repo struct { + // Path to the root of the default working copy (NOT the repo itself) + rpath string + + // Mutex controlling general access to the repo + mut sync.RWMutex + + // Object for direct repo interaction + r vcs.Repo + + // Whether or not the cache repo is in sync (think dvcs) with upstream + synced bool +} + +func (r *repo) exportVersionTo(v Version, to string) error { + r.mut.Lock() + defer r.mut.Unlock() + + // TODO(sdboyer) This is a dumb, slow approach, but we're punting on making + // these fast for now because git is the OVERWHELMING case (it's handled in + // its own method) + r.r.UpdateVersion(v.String()) + + cfg := &shutil.CopyTreeOptions{ + Symlinks: true, + CopyFunction: shutil.Copy, + Ignore: func(src string, contents []os.FileInfo) (ignore []string) { + for _, fi := range contents { + if !fi.IsDir() { + continue + } + n := fi.Name() + switch n { + case "vendor", ".bzr", ".svn", ".hg": + ignore = append(ignore, n) + } + } + + return + }, + } + + return shutil.CopyTree(r.rpath, to, cfg) +} + +// This func copied from Masterminds/vcs so we can exec our own commands +func mergeEnvLists(in, out []string) []string { +NextVar: + for _, inkv := range in { + k := strings.SplitAfterN(inkv, "=", 2)[0] + for i, outkv := range out { + if strings.HasPrefix(outkv, k) { + out[i] = inkv + continue NextVar + } + } + out = append(out, inkv) + } + return out +} + +func stripVendor(path string, info os.FileInfo, err error) error { + if info.Name() == "vendor" { + if _, err := os.Lstat(path); err == nil { + if info.IsDir() { + return removeAll(path) + } + } + } + + return nil +} diff --git a/version.go b/version.go index 57d37ec..230e0ca 100644 --- a/version.go +++ b/version.go @@ -16,6 +16,7 @@ import "github.com/Masterminds/semver" // hiding behind the interface. type Version interface { Constraint + // Indicates the type of version - Revision, Branch, Version, or Semver Type() string } @@ -24,8 +25,15 @@ type Version interface { // underlying Revision. type PairedVersion interface { Version + // Underlying returns the immutable Revision that identifies this Version. Underlying() Revision + + // Unpair returns the surface-level UnpairedVersion that half of the pair. + // + // It does NOT modify the original PairedVersion + Unpair() UnpairedVersion + // Ensures it is impossible to be both a PairedVersion and an // UnpairedVersion _pair(int) @@ -380,6 +388,10 @@ func (v versionPair) Underlying() Revision { return v.r } +func (v versionPair) Unpair() UnpairedVersion { + return v.v +} + func (v versionPair) Matches(v2 Version) bool { switch tv2 := v2.(type) { case versionTypeUnion: diff --git a/version_queue.go b/version_queue.go index e74a1da..7c92253 100644 --- a/version_queue.go +++ b/version_queue.go @@ -40,7 +40,7 @@ func newVersionQueue(id ProjectIdentifier, lockv, prefv Version, b sourceBridge) if len(vq.pi) == 0 { var err error - vq.pi, err = vq.b.listVersions(vq.id) + vq.pi, err = vq.b.ListVersions(vq.id) if err != nil { // TODO(sdboyer) pushing this error this early entails that we // unconditionally deep scan (e.g. vendor), as well as hitting the @@ -86,7 +86,7 @@ func (vq *versionQueue) advance(fail error) (err error) { } vq.allLoaded = true - vq.pi, err = vq.b.listVersions(vq.id) + vq.pi, err = vq.b.ListVersions(vq.id) if err != nil { return err }