Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(multi-tenancy): add support for multi-tenancy in bulk loader #7399

Merged
merged 7 commits into from
Feb 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 38 additions & 4 deletions chunker/json_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,8 +189,9 @@ func parseScalarFacets(m map[string]interface{}, prefix string) ([]*api.Facet, e

// This is the response for a map[string]interface{} i.e. a struct.
type mapResponse struct {
uid string // uid retrieved or allocated for the node.
fcts []*api.Facet // facets on the edge connecting this node to the source if any.
uid string // uid retrieved or allocated for the node.
namespace uint64 // namespace to which the node belongs.
fcts []*api.Facet // facets on the edge connecting this node to the source if any.
}

func handleBasicType(k string, v interface{}, op int, nq *api.NQuad) error {
Expand Down Expand Up @@ -267,6 +268,7 @@ func (buf *NQuadBuffer) checkForDeletion(mr mapResponse, m map[string]interface{
buf.Push(&api.NQuad{
Subject: mr.uid,
Predicate: x.Star,
Namespace: mr.namespace,
ObjectValue: &api.Value{Val: &api.Value_DefaultVal{DefaultVal: x.Star}},
})
}
Expand Down Expand Up @@ -447,12 +449,38 @@ func (buf *NQuadBuffer) mapToNquads(m map[string]interface{}, op int, parentPred
mr.uid = getNextBlank()
}

namespace := x.DefaultNamespace
if ns, ok := m["namespace"]; ok {
switch nsVal := ns.(type) {
case json.Number:
nsi, err := nsVal.Int64()
if err != nil {
return mr, err
}
namespace = uint64(nsi)

// this int64 case is needed for FastParseJSON, which doesn't use json.Number
case int64:
namespace = uint64(nsVal)
case string:
s := stripSpaces(nsVal)
if s == "" {
namespace = 0
} else if n, err := strconv.ParseUint(s, 0, 64); err == nil {
namespace = n
} else {
return mr, err
}
}
}
mr.namespace = namespace

for pred, v := range m {
// We have already extracted the uid above so we skip that edge.
// v can be nil if user didn't set a value and if omitEmpty was not supplied as JSON
// option.
// We also skip facets here because we parse them with the corresponding predicate.
if pred == "uid" {
if pred == "uid" || pred == "namespace" {
continue
}

Expand All @@ -462,6 +490,7 @@ func (buf *NQuadBuffer) mapToNquads(m map[string]interface{}, op int, parentPred
nq := &api.NQuad{
Subject: mr.uid,
Predicate: pred,
Namespace: namespace,
ObjectValue: &api.Value{Val: &api.Value_DefaultVal{DefaultVal: x.Star}},
}
// Here we split predicate and lang directive (ex: "name@en"), if needed. With JSON
Expand All @@ -478,6 +507,7 @@ func (buf *NQuadBuffer) mapToNquads(m map[string]interface{}, op int, parentPred
nq := api.NQuad{
Subject: mr.uid,
Predicate: pred,
Namespace: namespace,
}

prefix := pred + x.FacetDelimeter
Expand Down Expand Up @@ -545,6 +575,7 @@ func (buf *NQuadBuffer) mapToNquads(m map[string]interface{}, op int, parentPred
nq := api.NQuad{
Subject: mr.uid,
Predicate: pred,
Namespace: namespace,
}

switch iv := item.(type) {
Expand Down Expand Up @@ -739,8 +770,11 @@ func (buf *NQuadBuffer) ParseJSON(b []byte, op int) error {
return nil
}
mr, err := buf.mapToNquads(ms, op, "")
if err != nil {
return err
}
buf.checkForDeletion(mr, ms, op)
return err
return nil
}

// ParseJSON is a convenience wrapper function to get all NQuads in one call. This can however, lead
Expand Down
28 changes: 16 additions & 12 deletions chunker/json_parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,14 +60,15 @@ type address struct {
}

type Person struct {
Uid string `json:"uid,omitempty"`
Name string `json:"name,omitempty"`
Age int `json:"age,omitempty"`
Married *bool `json:"married,omitempty"`
Now *time.Time `json:"now,omitempty"`
Address address `json:"address,omitempty"` // geo value
Friends []Person `json:"friend,omitempty"`
School *School `json:"school,omitempty"`
Uid string `json:"uid,omitempty"`
Namespace string `json:"namespace,omitempty"`
Name string `json:"name,omitempty"`
Age int `json:"age,omitempty"`
Married *bool `json:"married,omitempty"`
Now *time.Time `json:"now,omitempty"`
Address address `json:"address,omitempty"` // geo value
Friends []Person `json:"friend,omitempty"`
School *School `json:"school,omitempty"`
}

func Parse(b []byte, op int) ([]*api.NQuad, error) {
Expand All @@ -90,6 +91,7 @@ func (exp *Experiment) verify() {
exp.t.Fatalf("Error while getting a dgraph client: %v", err)
}

// TODO(Naman): Fix these tests, once the ACL is integrated.
ctx := context.Background()
require.NoError(exp.t, dg.Alter(ctx, &api.Operation{DropAll: true}), "drop all failed")
require.NoError(exp.t, dg.Alter(ctx, &api.Operation{Schema: exp.schema}),
Expand All @@ -116,10 +118,12 @@ func TestNquadsFromJson1(t *testing.T) {
tn := time.Now().UTC()
m := true
p := Person{
Name: "Alice",
Age: 26,
Married: &m,
Now: &tn,
Uid: "1",
Namespace: "0x2",
Name: "Alice",
Age: 26,
Married: &m,
Now: &tn,
Address: address{
Type: "Point",
Coords: []float64{1.1, 2.0},
Expand Down
14 changes: 9 additions & 5 deletions chunker/rdf_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ var (
ErrEmpty = errors.New("RDF: harmless error, e.g. comment line")
)

// Function to do sanity check for subject, predicate, object and label strings.
// Function to do sanity check for subject, predicate and object strings.
func sane(s string) bool {
// Label and ObjectId can be "", we already check that subject and predicate
// ObjectId can be "", we already check that subject and predicate
// shouldn't be empty.
if len(s) == 0 {
return true
Expand Down Expand Up @@ -194,7 +194,12 @@ L:
break L

case itemLabel:
rnq.Label = strings.TrimFunc(item.Val, isSpaceRune)
s := strings.TrimFunc(item.Val, isSpaceRune)
namespace, err := strconv.ParseUint(s, 0, 64)
if err != nil {
return rnq, errors.Errorf("Invalid namespace ID. Input: [%s]", line)
}
rnq.Namespace = namespace

case itemLeftRound:
it.Prev() // backup '('
Expand All @@ -221,8 +226,7 @@ L:
if len(rnq.ObjectId) == 0 && rnq.ObjectValue == nil {
return rnq, errors.Errorf("No Object in NQuad. Input: [%s]", line)
}
if !sane(rnq.Subject) || !sane(rnq.Predicate) ||
!sane(rnq.ObjectId) || !sane(rnq.Label) {
if !sane(rnq.Subject) || !sane(rnq.Predicate) || !sane(rnq.ObjectId) {
return rnq, errors.Errorf("NQuad failed sanity check:%+v", rnq)
}

Expand Down
22 changes: 11 additions & 11 deletions chunker/rdf_parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -346,40 +346,40 @@ var testNQuads = []struct {
expectedErr: false,
},
{
input: `_:alice <knows> "stuff"^^<xs:string> <label> .`,
input: `_:alice <knows> "stuff"^^<xs:string> <0xf2> .`,
nq: api.NQuad{
Subject: "_:alice",
Predicate: "knows",
ObjectId: "",
ObjectValue: &api.Value{Val: &api.Value_StrVal{StrVal: "stuff"}},
Label: "label",
Namespace: 0xf2,
},
expectedErr: false,
},
{
input: `_:alice <knows> "stuff"^^<xs:string> _:label .`,
input: `_:alice <knows> "stuff"^^<xs:string> <0xf2> .`,
nq: api.NQuad{
Subject: "_:alice",
Predicate: "knows",
ObjectId: "",
ObjectValue: &api.Value{Val: &api.Value_StrVal{StrVal: "stuff"}},
Label: "_:label",
Namespace: 0xf2,
},
expectedErr: false,
},
{
input: `_:alice <knows> "stuff"^^<xs:string> _:label . # comment`,
input: `_:alice <knows> "stuff"^^<xs:string> <10> . # comment`,
nq: api.NQuad{
Subject: "_:alice",
Predicate: "knows",
ObjectId: "",
ObjectValue: &api.Value{Val: &api.Value_StrVal{StrVal: "stuff"}},
Label: "_:label",
Namespace: 10,
},
expectedErr: false,
},
{
input: `_:alice <knows> "stuff"^^<xs:string> "label" .`,
input: `_:alice <knows> "stuff"^^<xs:string> "0xf2" .`,
expectedErr: true,
},
{
Expand Down Expand Up @@ -599,13 +599,13 @@ var testNQuads = []struct {

// Edge Facets test.
{
input: `_:alice <knows> "stuff" _:label (key1="val1",key2=13) .`,
input: `_:alice <knows> "stuff" <0x10> (key1="val1",key2=13) .`,
nq: api.NQuad{
Subject: "_:alice",
Predicate: "knows",
ObjectId: "",
ObjectValue: &api.Value{Val: &api.Value_DefaultVal{DefaultVal: "stuff"}},
Label: "_:label",
Namespace: 0x10,
Facets: []*api.Facet{
{
Key: "key1",
Expand All @@ -623,13 +623,13 @@ var testNQuads = []struct {
expectedErr: false,
},
{
input: `_:alice <knows> "stuff" _:label (key1=,key2=13) .`,
input: `_:alice <knows> "stuff" <0x12> (key1=,key2=13) .`,
nq: api.NQuad{
Subject: "_:alice",
Predicate: "knows",
ObjectId: "",
ObjectValue: &api.Value{Val: &api.Value_DefaultVal{DefaultVal: "stuff"}},
Label: "_:label",
Namespace: 0x12,
Facets: []*api.Facet{
{
Key: "key1",
Expand Down
43 changes: 42 additions & 1 deletion dgraph/cmd/bulk/loader.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ type options struct {
MapShards int
ReduceShards int

Namespace uint64

shardOutputDirs []string

// ........... Badger options ..........
Expand All @@ -100,6 +102,7 @@ type state struct {
dbs []*badger.DB
tmpDbs []*badger.DB // Temporary DB to write the split lists to avoid ordering issues.
writeTs uint64 // All badger writes use this timestamp
namespaces *sync.Map // To store the encountered namespaces.
}

type loader struct {
Expand Down Expand Up @@ -136,6 +139,7 @@ func newLoader(opt *options) *loader {
// Lots of gz readers, so not much channel buffer needed.
readerChunkCh: make(chan *bytes.Buffer, opt.NumGoroutines),
writeTs: getWriteTimestamp(zero),
namespaces: &sync.Map{},
}
st.schema = newSchemaStore(readSchema(opt), opt, st)
ld := &loader{
Expand Down Expand Up @@ -164,6 +168,36 @@ func getWriteTimestamp(zero *grpc.ClientConn) uint64 {
}
}

// leaseNamespace is called at the end of map phase. It leases the namespace ids till the maximum
// seen namespace id.
func (ld *loader) leaseNamespaces() {
var maxNs uint64
ld.namespaces.Range(func(key, value interface{}) bool {
if ns := key.(uint64); ns > maxNs {
maxNs = ns
}
return true
})

// If only the default namespace is seen, do nothing.
if maxNs == 0 {
return
}

client := pb.NewZeroClient(ld.zero)
for {
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
ns, err := client.AssignIds(ctx, &pb.Num{Val: maxNs, Type: pb.Num_NS_ID})
cancel()
if err == nil {
fmt.Printf("Assigned namespaces till %d", ns.GetEndId())
return
}
fmt.Printf("Error communicating with dgraph zero, retrying: %v", err)
time.Sleep(time.Second)
}
}

func readSchema(opt *options) *schema.ParsedSchema {
f, err := filestore.Open(opt.SchemaFile)
x.Check(err)
Expand All @@ -183,7 +217,7 @@ func readSchema(opt *options) *schema.ParsedSchema {
buf, err := ioutil.ReadAll(r)
x.Check(err)

result, err := schema.Parse(string(buf))
result, err := schema.ParseWithNamespace(string(buf), opt.Namespace)
x.Check(err)
return result
}
Expand Down Expand Up @@ -260,6 +294,9 @@ func (ld *loader) mapStage() {
x.Check(thr.Finish())

// Send the graphql triples
// TODO(Naman): Handle this. Currently we are not attaching the namespace info with the exported
// graphql schema (See exportInternal). Also, attach the namespace information once for the
// namespace we are loading into.
ld.processGqlSchema(loadType)

close(ld.readerChunkCh)
Expand All @@ -276,6 +313,7 @@ func (ld *loader) mapStage() {
ld.xids = nil
}

// TODO(Naman): Fix this for multi-tenancy.
func (ld *loader) processGqlSchema(loadType chunker.InputFormat) {
if ld.opt.GqlSchemaFile == "" {
return
Expand All @@ -299,6 +337,7 @@ func (ld *loader) processGqlSchema(loadType chunker.InputFormat) {
buf, err := ioutil.ReadAll(r)
x.Check(err)

// TODO(Naman): We will nedd this for all the namespaces.
rdfSchema := `_:gqlschema <dgraph.type> "dgraph.graphql" .
_:gqlschema <dgraph.graphql.xid> "dgraph.graphql.schema" .
_:gqlschema <dgraph.graphql.schema> %s .
Expand All @@ -310,6 +349,8 @@ func (ld *loader) processGqlSchema(loadType chunker.InputFormat) {
"dgraph.graphql.schema": %s
}`

// TODO(Naman): Process the GQL schema here.

gqlBuf := &bytes.Buffer{}
schema := strconv.Quote(string(buf))
switch loadType {
Expand Down
Loading