Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Generalise dcs to work on other distros #25

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 6 additions & 21 deletions cmd/compute-ranking/compute-ranking.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,21 @@
package main

import (
"compress/bzip2"
"database/sql"
"flag"
"fmt"
_ "github.com/lib/pq"
"github.com/mstap/godebiancontrol"
"github.com/Debian/dcs/utils"
"log"
"os"
"path/filepath"
"strings"
)

var mirrorPath = flag.String("mirror_path",
"/media/sdd1/debian-source-mirror/",
"Path to the debian source mirror (which contains the 'dists' and 'pool' folder)")
var dist = flag.String("dist",
"sid",
"The release to scan")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of “release”, use “distribution” to stay consistent with the naming throughout DCS.

var dryRun = flag.Bool("dry_run", false, "Don’t actually write anything to the database.")
var verbose = flag.Bool("verbose", false, "Print ranking information about every package")
var popconInstSrc map[string]float32 = make(map[string]float32)
Expand Down Expand Up @@ -57,21 +57,6 @@ func fillPopconInst() {

}

func mustLoadMirroredControlFile(name string) []godebiancontrol.Paragraph {
file, err := os.Open(filepath.Join(*mirrorPath, "dists/sid/main/", name))
if err != nil {
log.Fatal(err)
}
defer file.Close()

contents, err := godebiancontrol.Parse(bzip2.NewReader(file))
if err != nil {
log.Fatal(err)
}

return contents
}

func main() {
flag.Parse()

Expand All @@ -94,8 +79,8 @@ func main() {
}
defer update.Close()

sourcePackages := mustLoadMirroredControlFile("source/Sources.bz2")
binaryPackages := mustLoadMirroredControlFile("binary-amd64/Packages.bz2")
sourcePackages := utils.MustLoadMirroredControlFile(*mirrorPath, *dist, "source/Sources.bz2")
binaryPackages := utils.MustLoadMirroredControlFile(*mirrorPath, *dist, "binary-amd64/Packages.bz2")

reverseDeps := make(map[string]uint)
for _, pkg := range binaryPackages {
Expand Down
54 changes: 24 additions & 30 deletions cmd/dcs-unpack/unpack.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
package main

import (
"compress/bzip2"
"flag"
"fmt"
"github.com/mstap/godebiancontrol"
"github.com/Debian/dcs/utils"
"log"
"os"
"os/exec"
Expand All @@ -22,6 +21,9 @@ var oldUnpackPath = flag.String("old_unpacked_path",
var newUnpackPath = flag.String("new_unpacked_path",
"/dcs-ssd/unpacked-new/",
"Path to the unpacked debian source mirror")
var dist = flag.String("dist",
"sid",
"The release to scan")

// Copies directories by hard-linking all files inside,
// necessary since hard-links on directories are not possible.
Expand All @@ -48,17 +50,7 @@ func linkDirectory(oldPath, newPath string) error {
func main() {
flag.Parse()

// Walk through all source packages
file, err := os.Open(path.Join(*mirrorPath, "/dists/sid/main/source/Sources.bz2"))
if err != nil {
log.Fatal(err)
}
defer file.Close()

sourcePackages, err := godebiancontrol.Parse(bzip2.NewReader(file))
if err != nil {
log.Fatal(err)
}
sourcePackages := utils.MustLoadMirroredControlFile(*mirrorPath, *dist, "source/Sources.bz2")

if err := os.Mkdir(*newUnpackPath, 0775); err != nil && !os.IsExist(err) {
log.Fatal(err)
Expand All @@ -75,31 +67,31 @@ func main() {
oldPath := path.Join(*oldUnpackPath, dir)
newPath := path.Join(*newUnpackPath, dir)

// Check whether the directory exists in the old "unpacked" directory
if _, err := os.Stat(oldPath); err == nil {
// Check whether the directory exists in the old "unpacked"
// directory and hardlink only if the new path doesn't exist
// (to avoid wasted time hardlinking in case of partial runs)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What I don’t like about this is that you need to cancel your runs on “directory boundaries”. I.e., if 3 of 5 files are unpacked, you cancel, the next run will skip the directory even though 2 files are still missing.

Handling this case properly is too complex, so that the entire optimization is not worth it. Can you remove it?

_, oldErr := os.Stat(oldPath)
_, newErr := os.Stat (newPath)
if oldErr == nil && newErr != nil {
log.Printf("hardlink %s\n", dir)
// If so, just hardlink it to save space and computing time.
if err := linkDirectory(oldPath, newPath); err != nil {
log.Fatal(err)
}
} else {
} else if oldErr != nil && newErr != nil {
log.Printf("unpack %s\n", dir)
files := strings.Split(pkg["Files"], "\n")
filepath := ""
for _, line := range files {
if !strings.HasSuffix(line, ".dsc") {
continue
files := strings.Split(pkg["Files"], "\n")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is the indentation different here, especially in (new) line 82 vs 83? This, plus the blank after os.Stat makes me think you are not using gofmt…?

filepath := ""
for _, line := range files {
if !strings.HasSuffix(line, ".dsc") {
continue
}

parts := strings.Split(line, " ")
file := parts[len(parts)-1]
filepath = path.Join(*mirrorPath, pkg["Directory"], file)
}

parts := strings.Split(line, " ")
file := parts[len(parts)-1]
prefix := string(file[0])
if strings.HasPrefix(file, "lib") {
prefix = "lib" + string(file[3])
}
filepath = path.Join(*mirrorPath, "pool", "main", prefix, pkg["Package"], file)
}

if filepath == "" {
log.Fatalf("Package %s contains no dsc file, cannot unpack\n", pkg["Package"])
}
Expand All @@ -114,6 +106,8 @@ func main() {
if err := cmd.Run(); err != nil {
log.Fatal(err)
}
} else {
log.Printf("Skip unpack of %s\n", dir)
}
}
}
33 changes: 33 additions & 0 deletions utils/utils.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package utils

import (
"compress/bzip2"
"github.com/mstap/godebiancontrol"
"log"
"os"
"path/filepath"
)

func MustLoadMirroredControlFile(mirrorPath string, dist string, name string) []godebiancontrol.Paragraph {
var base = filepath.Join(mirrorPath, "dists", dist)
files, err := os.Open(base)
if err != nil {
log.Fatal(err)
}
fi, err := files.Readdir(-1)
var contents = make([]godebiancontrol.Paragraph, 0)
for _, file := range fi {
if !file.IsDir() {
continue
}
file, err := os.Open(filepath.Join(base, file.Name(), name))
contents_new, err := godebiancontrol.Parse(bzip2.NewReader(file))
if err != nil {
log.Fatal(err)
}
contents = append(contents, contents_new...)
defer file.Close()
}

return contents
}