Skip to content

Commit

Permalink
insert repo table
Browse files Browse the repository at this point in the history
  • Loading branch information
bbkane committed Jan 20, 2022
1 parent 11deb3c commit abf7320
Show file tree
Hide file tree
Showing 8 changed files with 485 additions and 13 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
dist/
starghaze
tmp*
starghaze-sa-keys.json
starghaze.db
starghaze_download.jsonl
tmp*
144 changes: 143 additions & 1 deletion format.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ package main

import (
"bufio"
"context"
"database/sql"
"embed"
"encoding/csv"
"encoding/json"
"fmt"
Expand All @@ -13,8 +16,12 @@ import (

"github.com/bbkane/warg/flag"
"github.com/lestrrat-go/strftime"
_ "modernc.org/sqlite"
)

//go:embed sqlite_migrations/*.sql
var migrationFS embed.FS

type Printer interface {
Header() error
Line(*starredRepositoryEdge) error
Expand Down Expand Up @@ -147,6 +154,130 @@ func (p *CSVPrinter) Flush() error {
return p.writer.Error()
}

// -- SqlitePrinter

type SqlitePrinter struct {
ctx context.Context
db *sql.DB
err error
// we're going to use one transaction for all writes
// so we might as well cache it here
tx *sql.Tx
}

func NewSqlitePrinter(dsn string) (*SqlitePrinter, error) {
db, err := sql.Open("sqlite", dsn)
if err != nil {
return nil, fmt.Errorf("db open error: %s: %w", dsn, err)
}

// Enable foreign key checks. For historical reasons, SQLite does not check
// foreign key constraints by default... which is kinda insane. There's some
// overhead on inserts to verify foreign key integrity but it's definitely
// worth it.
if _, err := db.Exec(`PRAGMA foreign_keys = ON;`); err != nil {
return nil, fmt.Errorf("foreign keys pragma: %w", err)
}
if err := migrate(db, migrationFS); err != nil {
return nil, fmt.Errorf("migrate: %w", err)
}

tx, err := db.Begin()
if err != nil {
err = fmt.Errorf("can't begin tx: %w", err)
return nil, err
}

return &SqlitePrinter{
ctx: context.Background(), // TODO: paramaterize
db: db,
err: nil,
tx: tx,
}, nil
}

func (SqlitePrinter) Header() error {
return nil
}

func (p *SqlitePrinter) Line(sr *starredRepositoryEdge) error {
// we need to set p.err if needed so we don't commit the tx later

// Repo
var repoID int
{
starredAt, err := sr.StarredAt.Time()
if err != nil {
err = fmt.Errorf("StarredAt time err: %w", err)
p.err = err
return err
}

pushedAt, err := sr.Node.PushedAt.Time()
if err != nil {
err = fmt.Errorf("PushedAt time err: %w", err)
p.err = err
return err
}

updatedAt, err := sr.Node.UpdatedAt.Time()
if err != nil {
err = fmt.Errorf("UpdatedAt time err: %w", err)
p.err = err
return err
}
err = p.tx.QueryRowContext(
p.ctx,
`
INSERT INTO Repo (
StarredAt,
Description,
HomepageURL,
NameWithOwner,
Readme,
PushedAt,
StargazerCount,
UpdatedAt,
Url
)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
RETURNING id
`,
(*NullTime)(&starredAt),
sr.Node.Description,
sr.Node.HomepageURL,
sr.Node.NameWithOwner,
sr.Node.Object.Blob.Text,
(*NullTime)(&pushedAt),
sr.Node.StargazerCount,
(*NullTime)(&updatedAt),
sr.Node.Url,
).Scan(&repoID)
if err != nil {
p.err = err
return err
}
}

return nil
}

func (p *SqlitePrinter) Flush() error {

if p.err != nil {
p.tx.Rollback()
}
err := p.tx.Commit()
if err != nil {
err = fmt.Errorf("commit err: %w", err)
return err
}

return p.db.Close()
}

var _ Printer = new(SqlitePrinter)

// -- ZincPrinter

type ZincPrinter struct {
Expand Down Expand Up @@ -252,13 +383,18 @@ func (d *formattedDate) UnmarshalJSON(b []byte) error {
return json.Unmarshal(b, &d.datetime)
}

func (d formattedDate) Time() (time.Time, error) {
t, err := time.Parse(time.RFC3339, d.datetime)
return t, err
}

// FormatString formats d with the given format.
// If the format is nil, it jsut returns d
func (d *formattedDate) FormatString() (string, error) {
if d.Format == nil {
return d.datetime, nil
}
t, err := time.Parse(time.RFC3339, d.datetime)
t, err := d.Time()
if err != nil {
return "", err
}
Expand All @@ -269,6 +405,7 @@ func format(pf flag.PassedFlags) error {
format := pf["--format"].(string)
includeReadmes := pf["--include-readmes"].(bool)
maxLineSize := pf["--max-line-size"].(int)
sqliteDSN := pf["--sqlite-dsn"].(string)
zincIndexName := pf["--zinc-index-name"].(string)

dateFormatStr, dateFormatStrExists := pf["--date-format"].(string)
Expand Down Expand Up @@ -301,6 +438,11 @@ func format(pf flag.PassedFlags) error {
p = NewCSVPrinter(outputBuf)
case "jsonl":
p = NewJSONPrinter(outputBuf)
case "sqlite":
p, err = NewSqlitePrinter(sqliteDSN)
if err != nil {
return fmt.Errorf("sql open err: %w", err)
}
case "zinc":
p = NewZincPrinter(outputBuf, zincIndexName)
default:
Expand Down
16 changes: 7 additions & 9 deletions github.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,16 +74,14 @@ func githubStarsDownload(pf flag.PassedFlags) error {
afterPtr = &afterStr
}

output, outputExists := pf["--output"].(string)
fp := os.Stdout
if outputExists {
newFP, err := os.Create(output)
if err != nil {
return fmt.Errorf("file open err: %w", err)
}
fp = newFP
defer newFP.Close()
outputPath := pf["--output"].(string)
// https://pkg.go.dev/os?utm_source=gopls#pkg-constants
// return error if the file exists - NOTE: this kind of screws with any plans to append
fp, err := os.OpenFile(outputPath, os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0666)
if err != nil {
return fmt.Errorf("file open err: %w", err)
}
defer fp.Close()

buf := bufio.NewWriter(fp)
defer buf.Flush()
Expand Down
16 changes: 16 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,34 @@ require (
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e // indirect
github.com/golang/protobuf v1.5.2 // indirect
github.com/google/uuid v1.3.0 // indirect
github.com/googleapis/gax-go/v2 v2.1.1 // indirect
github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 // indirect
github.com/lestrrat-go/strftime v1.0.5 // indirect
github.com/mattn/go-isatty v0.0.14 // indirect
github.com/mitchellh/go-homedir v1.1.0 // indirect
github.com/pkg/errors v0.8.1 // indirect
github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0 // indirect
github.com/shurcooL/graphql v0.0.0-20200928012149-18c5c3165e3a // indirect
go.opencensus.io v0.23.0 // indirect
golang.org/x/mod v0.4.2 // indirect
golang.org/x/net v0.0.0-20210503060351-7fd8e65b6420 // indirect
golang.org/x/sys v0.0.0-20211210111614-af8b64212486 // indirect
golang.org/x/text v0.3.6 // indirect
golang.org/x/tools v0.1.5 // indirect
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/genproto v0.0.0-20211206160659-862468c7d6e0 // indirect
google.golang.org/grpc v1.40.1 // indirect
google.golang.org/protobuf v1.27.1 // indirect
lukechampine.com/uint128 v1.1.1 // indirect
modernc.org/cc/v3 v3.35.22 // indirect
modernc.org/ccgo/v3 v3.15.1 // indirect
modernc.org/libc v1.14.1 // indirect
modernc.org/mathutil v1.4.1 // indirect
modernc.org/memory v1.0.5 // indirect
modernc.org/opt v0.1.1 // indirect
modernc.org/sqlite v1.14.5 // indirect
modernc.org/strutil v1.1.1 // indirect
modernc.org/token v1.0.0 // indirect
)
Loading

0 comments on commit abf7320

Please sign in to comment.