From b1582ce1c96f5424c91cc74312663de49cbb1798 Mon Sep 17 00:00:00 2001 From: Ben Johnson Date: Wed, 2 Mar 2016 11:50:05 -0700 Subject: [PATCH 01/14] add RetentionPolicyInfo marshaling --- services/meta/data.go | 19 +++++++++++++++++-- services/meta/store_fsm.go | 4 ++-- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/services/meta/data.go b/services/meta/data.go index 92458316bd1..45a6b6e4605 100644 --- a/services/meta/data.go +++ b/services/meta/data.go @@ -85,9 +85,9 @@ func (data *Data) CreateDataNode(host, tcpHost string) error { return nil } -// setDataNode adds a data node with a pre-specified nodeID. +// SetDataNode adds a data node with a pre-specified nodeID. // this should only be used when the cluster is upgrading from 0.9 to 0.10 -func (data *Data) setDataNode(nodeID uint64, host, tcpHost string) error { +func (data *Data) SetDataNode(nodeID uint64, host, tcpHost string) error { // Ensure a node with the same host doesn't already exist. for _, n := range data.DataNodes { if n.Host == host { @@ -1199,6 +1199,21 @@ func (rpi RetentionPolicyInfo) clone() RetentionPolicyInfo { return other } +// MarshalBinary encodes rpi to a binary format. +func (rpi *RetentionPolicyInfo) MarshalBinary() ([]byte, error) { + return proto.Marshal(rpi.marshal()) +} + +// UnmarshalBinary decodes rpi from a binary format. +func (rpi *RetentionPolicyInfo) UnmarshalBinary(data []byte) error { + var pb internal.RetentionPolicyInfo + if err := proto.Unmarshal(data, &pb); err != nil { + return err + } + rpi.unmarshal(&pb) + return nil +} + // shardGroupDuration returns the duration for a shard group based on a policy duration. func shardGroupDuration(d time.Duration) time.Duration { if d >= 180*24*time.Hour || d == 0 { // 6 months or 0 diff --git a/services/meta/store_fsm.go b/services/meta/store_fsm.go index e42f4dc5d62..02deda466b7 100644 --- a/services/meta/store_fsm.go +++ b/services/meta/store_fsm.go @@ -150,7 +150,7 @@ func (fsm *storeFSM) applyCreateNodeCommand(cmd *internal.Command, peers []strin if len(other.MetaNodes) == 1 { metaNode := other.MetaNodes[0] - if err := other.setDataNode(metaNode.ID, v.GetHost(), v.GetHost()); err != nil { + if err := other.SetDataNode(metaNode.ID, v.GetHost(), v.GetHost()); err != nil { return err } } else { @@ -566,7 +566,7 @@ func (fsm *storeFSM) applyCreateDataNodeCommand(cmd *internal.Command) interface if len(other.MetaNodes) == 1 && len(other.DataNodes) == 0 { metaNode := other.MetaNodes[0] - if err := other.setDataNode(metaNode.ID, v.GetHTTPAddr(), v.GetTCPAddr()); err != nil { + if err := other.SetDataNode(metaNode.ID, v.GetHTTPAddr(), v.GetTCPAddr()); err != nil { return err } } else { From c972c717b6d6c0fce5abf85a9216fdbc13e3fbd2 Mon Sep 17 00:00:00 2001 From: Cory LaNou Date: Wed, 2 Mar 2016 13:16:53 -0600 Subject: [PATCH 02/14] remove `client.NodeID` --- services/continuous_querier/service.go | 1 - services/meta/client.go | 3 --- services/meta/service_test.go | 1 - 3 files changed, 5 deletions(-) diff --git a/services/continuous_querier/service.go b/services/continuous_querier/service.go index 9ccf1faa8f2..4c369be3fa5 100644 --- a/services/continuous_querier/service.go +++ b/services/continuous_querier/service.go @@ -40,7 +40,6 @@ type metaClient interface { AcquireLease(name string) (l *meta.Lease, err error) Databases() ([]meta.DatabaseInfo, error) Database(name string) (*meta.DatabaseInfo, error) - NodeID() uint64 } // RunRequest is a request to run one or more CQs. diff --git a/services/meta/client.go b/services/meta/client.go index 32ab7900df4..c714bc67e50 100644 --- a/services/meta/client.go +++ b/services/meta/client.go @@ -109,9 +109,6 @@ func (c *Client) Close() error { return nil } -// GetNodeID returns the client's node ID. -func (c *Client) NodeID() uint64 { return c.nodeID } - // SetMetaServers updates the meta servers on the client. func (c *Client) SetMetaServers(a []string) { c.mu.Lock() diff --git a/services/meta/service_test.go b/services/meta/service_test.go index 41117ca57b3..8d89646a15c 100644 --- a/services/meta/service_test.go +++ b/services/meta/service_test.go @@ -1327,7 +1327,6 @@ func TestMetaService_AcquireLease(t *testing.T) { t.Fatalf("owner ID wrong. exp %d got %d", n1.ID, l.Owner) } - t.Logf("c1: %d, c2: %d", c1.NodeID(), c2.NodeID()) // Client 2 attempts to acquire the same lease. Should fail. l, err = c2.AcquireLease("foo") if err == nil { From 31a964e890558e0f8507257a592240853410d371 Mon Sep 17 00:00:00 2001 From: Jason Wilder Date: Wed, 2 Mar 2016 12:38:50 -0700 Subject: [PATCH 03/14] Move leases to meta.Data --- services/httpd/handler.go | 7 ++++-- services/meta/data.go | 46 +++++++++++++++++++++++++++++++++++++++ services/meta/handler.go | 44 ------------------------------------- 3 files changed, 51 insertions(+), 46 deletions(-) diff --git a/services/httpd/handler.go b/services/httpd/handler.go index 9d6775812d4..a9123af0b73 100644 --- a/services/httpd/handler.go +++ b/services/httpd/handler.go @@ -64,8 +64,11 @@ type Handler struct { Ping(checkAllMetaServers bool) error } - QueryAuthorizer *meta.QueryAuthorizer - QueryExecutor influxql.QueryExecutor + QueryAuthorizer interface { + AuthorizeQuery(u *meta.UserInfo, query *influxql.Query, database string) error + } + + QueryExecutor influxql.QueryExecutor PointsWriter interface { WritePoints(p *cluster.WritePointsRequest) error diff --git a/services/meta/data.go b/services/meta/data.go index 45a6b6e4605..6a6c57e7b28 100644 --- a/services/meta/data.go +++ b/services/meta/data.go @@ -1,8 +1,10 @@ package meta import ( + "errors" "fmt" "sort" + "sync" "time" "github.com/gogo/protobuf/proto" @@ -1524,6 +1526,50 @@ func (ui *UserInfo) unmarshal(pb *internal.UserInfo) { } } +type Lease struct { + Name string `json:"name"` + Expiration time.Time `json:"expiration"` + Owner uint64 `json:"owner"` +} + +type Leases struct { + mu sync.Mutex + m map[string]*Lease + d time.Duration +} + +func NewLeases(d time.Duration) *Leases { + return &Leases{ + m: make(map[string]*Lease), + d: d, + } +} + +func (leases *Leases) Acquire(name string, nodeID uint64) (*Lease, error) { + leases.mu.Lock() + defer leases.mu.Unlock() + + l, ok := leases.m[name] + if ok { + if time.Now().After(l.Expiration) || l.Owner == nodeID { + l.Expiration = time.Now().Add(leases.d) + l.Owner = nodeID + return l, nil + } + return l, errors.New("another node has the lease") + } + + l = &Lease{ + Name: name, + Expiration: time.Now().Add(leases.d), + Owner: nodeID, + } + + leases.m[name] = l + + return l, nil +} + // MarshalTime converts t to nanoseconds since epoch. A zero time returns 0. func MarshalTime(t time.Time) int64 { if t.IsZero() { diff --git a/services/meta/handler.go b/services/meta/handler.go index 9698ef9b4fe..82d10f47c61 100644 --- a/services/meta/handler.go +++ b/services/meta/handler.go @@ -479,47 +479,3 @@ func (h *handler) httpError(err error, w http.ResponseWriter, status int) { } http.Error(w, "", status) } - -type Lease struct { - Name string `json:"name"` - Expiration time.Time `json:"expiration"` - Owner uint64 `json:"owner"` -} - -type Leases struct { - mu sync.Mutex - m map[string]*Lease - d time.Duration -} - -func NewLeases(d time.Duration) *Leases { - return &Leases{ - m: make(map[string]*Lease), - d: d, - } -} - -func (leases *Leases) Acquire(name string, nodeID uint64) (*Lease, error) { - leases.mu.Lock() - defer leases.mu.Unlock() - - l, ok := leases.m[name] - if ok { - if time.Now().After(l.Expiration) || l.Owner == nodeID { - l.Expiration = time.Now().Add(leases.d) - l.Owner = nodeID - return l, nil - } - return l, errors.New("another node has the lease") - } - - l = &Lease{ - Name: name, - Expiration: time.Now().Add(leases.d), - Owner: nodeID, - } - - leases.m[name] = l - - return l, nil -} From beda072426126fdfd7d1e35ef6dcae3cfb74a1ec Mon Sep 17 00:00:00 2001 From: Ben Johnson Date: Fri, 4 Mar 2016 11:01:41 -0700 Subject: [PATCH 04/14] add support for remote expansion of regex This commit moves the `tsdb.Store.ExpandSources()` function onto the `influxql.IteratorCreator` and provides support for issuing source expansion across a cluster. --- cluster/internal/data.pb.go | 106 +++++++++++++++++++++---------- cluster/internal/data.proto | 9 +++ cluster/query_executor.go | 40 ++++++++++-- cluster/query_executor_test.go | 5 ++ cluster/rpc.go | 69 ++++++++++++++++++++ cluster/service.go | 49 ++++++++++++++ cluster/shard_writer.go | 3 + influxql/ast.go | 13 ++++ influxql/internal/internal.pb.go | 96 +++++++++++++--------------- influxql/iterator.gen.go | 16 +++++ influxql/iterator.gen.go.tmpl | 6 +- influxql/iterator.go | 39 ++++++++++++ influxql/iterator_test.go | 5 ++ tsdb/shard.go | 50 +++++++++++++++ tsdb/store.go | 80 ++++++----------------- 15 files changed, 430 insertions(+), 156 deletions(-) diff --git a/cluster/internal/data.pb.go b/cluster/internal/data.pb.go index 667cea9fb15..54c44951cd3 100644 --- a/cluster/internal/data.pb.go +++ b/cluster/internal/data.pb.go @@ -19,23 +19,23 @@ It has these top-level messages: FieldDimensionsResponse SeriesKeysRequest SeriesKeysResponse + ExpandSourcesRequest + ExpandSourcesResponse */ package internal import proto "github.com/gogo/protobuf/proto" -import fmt "fmt" import math "math" // Reference imports to suppress errors if they are not otherwise used. var _ = proto.Marshal -var _ = fmt.Errorf var _ = math.Inf type WriteShardRequest struct { - ShardID *uint64 `protobuf:"varint,1,req,name=ShardID" json:"ShardID,omitempty"` - Points [][]byte `protobuf:"bytes,2,rep,name=Points" json:"Points,omitempty"` - Database *string `protobuf:"bytes,3,opt,name=Database" json:"Database,omitempty"` - RetentionPolicy *string `protobuf:"bytes,4,opt,name=RetentionPolicy" json:"RetentionPolicy,omitempty"` + ShardID *uint64 `protobuf:"varint,1,req" json:"ShardID,omitempty"` + Points [][]byte `protobuf:"bytes,2,rep" json:"Points,omitempty"` + Database *string `protobuf:"bytes,3,opt" json:"Database,omitempty"` + RetentionPolicy *string `protobuf:"bytes,4,opt" json:"RetentionPolicy,omitempty"` XXX_unrecognized []byte `json:"-"` } @@ -72,8 +72,8 @@ func (m *WriteShardRequest) GetRetentionPolicy() string { } type WriteShardResponse struct { - Code *int32 `protobuf:"varint,1,req,name=Code" json:"Code,omitempty"` - Message *string `protobuf:"bytes,2,opt,name=Message" json:"Message,omitempty"` + Code *int32 `protobuf:"varint,1,req" json:"Code,omitempty"` + Message *string `protobuf:"bytes,2,opt" json:"Message,omitempty"` XXX_unrecognized []byte `json:"-"` } @@ -96,8 +96,8 @@ func (m *WriteShardResponse) GetMessage() string { } type ExecuteStatementRequest struct { - Statement *string `protobuf:"bytes,1,req,name=Statement" json:"Statement,omitempty"` - Database *string `protobuf:"bytes,2,req,name=Database" json:"Database,omitempty"` + Statement *string `protobuf:"bytes,1,req" json:"Statement,omitempty"` + Database *string `protobuf:"bytes,2,req" json:"Database,omitempty"` XXX_unrecognized []byte `json:"-"` } @@ -120,8 +120,8 @@ func (m *ExecuteStatementRequest) GetDatabase() string { } type ExecuteStatementResponse struct { - Code *int32 `protobuf:"varint,1,req,name=Code" json:"Code,omitempty"` - Message *string `protobuf:"bytes,2,opt,name=Message" json:"Message,omitempty"` + Code *int32 `protobuf:"varint,1,req" json:"Code,omitempty"` + Message *string `protobuf:"bytes,2,opt" json:"Message,omitempty"` XXX_unrecognized []byte `json:"-"` } @@ -144,8 +144,8 @@ func (m *ExecuteStatementResponse) GetMessage() string { } type CreateIteratorRequest struct { - ShardIDs []uint64 `protobuf:"varint,1,rep,name=ShardIDs" json:"ShardIDs,omitempty"` - Opt []byte `protobuf:"bytes,2,req,name=Opt" json:"Opt,omitempty"` + ShardIDs []uint64 `protobuf:"varint,1,rep" json:"ShardIDs,omitempty"` + Opt []byte `protobuf:"bytes,2,req" json:"Opt,omitempty"` XXX_unrecognized []byte `json:"-"` } @@ -168,7 +168,7 @@ func (m *CreateIteratorRequest) GetOpt() []byte { } type CreateIteratorResponse struct { - Err *string `protobuf:"bytes,1,opt,name=Err" json:"Err,omitempty"` + Err *string `protobuf:"bytes,1,opt" json:"Err,omitempty"` XXX_unrecognized []byte `json:"-"` } @@ -184,8 +184,8 @@ func (m *CreateIteratorResponse) GetErr() string { } type FieldDimensionsRequest struct { - ShardIDs []uint64 `protobuf:"varint,1,rep,name=ShardIDs" json:"ShardIDs,omitempty"` - Sources []byte `protobuf:"bytes,2,req,name=Sources" json:"Sources,omitempty"` + ShardIDs []uint64 `protobuf:"varint,1,rep" json:"ShardIDs,omitempty"` + Sources []byte `protobuf:"bytes,2,req" json:"Sources,omitempty"` XXX_unrecognized []byte `json:"-"` } @@ -208,9 +208,9 @@ func (m *FieldDimensionsRequest) GetSources() []byte { } type FieldDimensionsResponse struct { - Fields []string `protobuf:"bytes,1,rep,name=Fields" json:"Fields,omitempty"` - Dimensions []string `protobuf:"bytes,2,rep,name=Dimensions" json:"Dimensions,omitempty"` - Err *string `protobuf:"bytes,3,opt,name=Err" json:"Err,omitempty"` + Fields []string `protobuf:"bytes,1,rep" json:"Fields,omitempty"` + Dimensions []string `protobuf:"bytes,2,rep" json:"Dimensions,omitempty"` + Err *string `protobuf:"bytes,3,opt" json:"Err,omitempty"` XXX_unrecognized []byte `json:"-"` } @@ -240,8 +240,8 @@ func (m *FieldDimensionsResponse) GetErr() string { } type SeriesKeysRequest struct { - ShardIDs []uint64 `protobuf:"varint,1,rep,name=ShardIDs" json:"ShardIDs,omitempty"` - Opt []byte `protobuf:"bytes,2,req,name=Opt" json:"Opt,omitempty"` + ShardIDs []uint64 `protobuf:"varint,1,rep" json:"ShardIDs,omitempty"` + Opt []byte `protobuf:"bytes,2,req" json:"Opt,omitempty"` XXX_unrecognized []byte `json:"-"` } @@ -264,8 +264,8 @@ func (m *SeriesKeysRequest) GetOpt() []byte { } type SeriesKeysResponse struct { - SeriesList []byte `protobuf:"bytes,1,opt,name=SeriesList" json:"SeriesList,omitempty"` - Err *string `protobuf:"bytes,2,opt,name=Err" json:"Err,omitempty"` + SeriesList []byte `protobuf:"bytes,1,opt" json:"SeriesList,omitempty"` + Err *string `protobuf:"bytes,2,opt" json:"Err,omitempty"` XXX_unrecognized []byte `json:"-"` } @@ -287,15 +287,53 @@ func (m *SeriesKeysResponse) GetErr() string { return "" } +type ExpandSourcesRequest struct { + ShardIDs []uint64 `protobuf:"varint,1,rep" json:"ShardIDs,omitempty"` + Sources []byte `protobuf:"bytes,2,req" json:"Sources,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *ExpandSourcesRequest) Reset() { *m = ExpandSourcesRequest{} } +func (m *ExpandSourcesRequest) String() string { return proto.CompactTextString(m) } +func (*ExpandSourcesRequest) ProtoMessage() {} + +func (m *ExpandSourcesRequest) GetShardIDs() []uint64 { + if m != nil { + return m.ShardIDs + } + return nil +} + +func (m *ExpandSourcesRequest) GetSources() []byte { + if m != nil { + return m.Sources + } + return nil +} + +type ExpandSourcesResponse struct { + Sources []byte `protobuf:"bytes,1,req" json:"Sources,omitempty"` + Err *string `protobuf:"bytes,2,opt" json:"Err,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *ExpandSourcesResponse) Reset() { *m = ExpandSourcesResponse{} } +func (m *ExpandSourcesResponse) String() string { return proto.CompactTextString(m) } +func (*ExpandSourcesResponse) ProtoMessage() {} + +func (m *ExpandSourcesResponse) GetSources() []byte { + if m != nil { + return m.Sources + } + return nil +} + +func (m *ExpandSourcesResponse) GetErr() string { + if m != nil && m.Err != nil { + return *m.Err + } + return "" +} + func init() { - proto.RegisterType((*WriteShardRequest)(nil), "internal.WriteShardRequest") - proto.RegisterType((*WriteShardResponse)(nil), "internal.WriteShardResponse") - proto.RegisterType((*ExecuteStatementRequest)(nil), "internal.ExecuteStatementRequest") - proto.RegisterType((*ExecuteStatementResponse)(nil), "internal.ExecuteStatementResponse") - proto.RegisterType((*CreateIteratorRequest)(nil), "internal.CreateIteratorRequest") - proto.RegisterType((*CreateIteratorResponse)(nil), "internal.CreateIteratorResponse") - proto.RegisterType((*FieldDimensionsRequest)(nil), "internal.FieldDimensionsRequest") - proto.RegisterType((*FieldDimensionsResponse)(nil), "internal.FieldDimensionsResponse") - proto.RegisterType((*SeriesKeysRequest)(nil), "internal.SeriesKeysRequest") - proto.RegisterType((*SeriesKeysResponse)(nil), "internal.SeriesKeysResponse") } diff --git a/cluster/internal/data.proto b/cluster/internal/data.proto index 97e686e306f..a1579035e0a 100644 --- a/cluster/internal/data.proto +++ b/cluster/internal/data.proto @@ -52,3 +52,12 @@ message SeriesKeysResponse { optional string Err = 2; } +message ExpandSourcesRequest { + repeated uint64 ShardIDs = 1; + required bytes Sources = 2; +} + +message ExpandSourcesResponse { + required bytes Sources = 1; + optional string Err = 2; +} diff --git a/cluster/query_executor.go b/cluster/query_executor.go index 937b41594cb..e8e83928a18 100644 --- a/cluster/query_executor.go +++ b/cluster/query_executor.go @@ -415,13 +415,6 @@ func (e *QueryExecutor) executeSelectStatement(stmt *influxql.SelectStatement, c opt.MinTime = time.Unix(0, 0) } - // Expand regex sources to their actual source names. - sources, err := e.TSDBStore.ExpandSources(stmt.Sources) - if err != nil { - return err - } - stmt.Sources = sources - // Convert DISTINCT into a call. stmt.RewriteDistinct() @@ -434,6 +427,15 @@ func (e *QueryExecutor) executeSelectStatement(stmt *influxql.SelectStatement, c return err } + // Expand regex sources to their actual source names. + if stmt.Sources.HasRegex() { + sources, err := ic.ExpandSources(stmt.Sources) + if err != nil { + return err + } + stmt.Sources = sources + } + // Rewrite wildcards, if any exist. tmp, err := stmt.RewriteWildcards(ic) if err != nil { @@ -1056,6 +1058,30 @@ func (ic *remoteIteratorCreator) SeriesKeys(opt influxql.IteratorOptions) (influ return resp.SeriesList, resp.Err } +// ExpandSources expands regex sources on a remote iterator creator. +func (ic *remoteIteratorCreator) ExpandSources(sources influxql.Sources) (influxql.Sources, error) { + conn, err := ic.dialer.DialNode(ic.nodeID) + if err != nil { + return nil, err + } + defer conn.Close() + + // Write request. + if err := EncodeTLV(conn, expandSourcesRequestMessage, &ExpandSourcesRequest{ + ShardIDs: ic.shardIDs, + Sources: sources, + }); err != nil { + return nil, err + } + + // Read the response. + var resp ExpandSourcesResponse + if _, err := DecodeTLV(conn, &resp); err != nil { + return nil, err + } + return resp.Sources, resp.Err +} + // NodeDialer dials connections to a given node. type NodeDialer struct { MetaClient MetaClient diff --git a/cluster/query_executor_test.go b/cluster/query_executor_test.go index f14a810c180..6a171cad109 100644 --- a/cluster/query_executor_test.go +++ b/cluster/query_executor_test.go @@ -275,6 +275,7 @@ type IteratorCreator struct { CreateIteratorFn func(opt influxql.IteratorOptions) (influxql.Iterator, error) FieldDimensionsFn func(sources influxql.Sources) (fields, dimensions map[string]struct{}, err error) SeriesKeysFn func(opt influxql.IteratorOptions) (influxql.SeriesList, error) + ExpandSourcesFn func(sources influxql.Sources) (influxql.Sources, error) } func (ic *IteratorCreator) CreateIterator(opt influxql.IteratorOptions) (influxql.Iterator, error) { @@ -289,6 +290,10 @@ func (ic *IteratorCreator) SeriesKeys(opt influxql.IteratorOptions) (influxql.Se return ic.SeriesKeysFn(opt) } +func (ic *IteratorCreator) ExpandSources(sources influxql.Sources) (influxql.Sources, error) { + return ic.ExpandSourcesFn(sources) +} + // FloatIterator is a represents an iterator that reads from a slice. type FloatIterator struct { Points []influxql.FloatPoint diff --git a/cluster/rpc.go b/cluster/rpc.go index 8710e899801..814615af3cc 100644 --- a/cluster/rpc.go +++ b/cluster/rpc.go @@ -411,3 +411,72 @@ func (r *SeriesKeysResponse) UnmarshalBinary(data []byte) error { return nil } + +// ExpandSourcesRequest represents a request to expand regex sources. +type ExpandSourcesRequest struct { + ShardIDs []uint64 + Sources influxql.Sources +} + +// MarshalBinary encodes r to a binary format. +func (r *ExpandSourcesRequest) MarshalBinary() ([]byte, error) { + buf, err := r.Sources.MarshalBinary() + if err != nil { + return nil, err + } + return proto.Marshal(&internal.ExpandSourcesRequest{ + ShardIDs: r.ShardIDs, + Sources: buf, + }) +} + +// UnmarshalBinary decodes data into r. +func (r *ExpandSourcesRequest) UnmarshalBinary(data []byte) error { + var pb internal.ExpandSourcesRequest + if err := proto.Unmarshal(data, &pb); err != nil { + return err + } + + r.ShardIDs = pb.GetShardIDs() + if err := r.Sources.UnmarshalBinary(pb.GetSources()); err != nil { + return err + } + return nil +} + +// ExpandSourcesResponse represents a response from source expansion. +type ExpandSourcesResponse struct { + Sources influxql.Sources + Err error +} + +// MarshalBinary encodes r to a binary format. +func (r *ExpandSourcesResponse) MarshalBinary() ([]byte, error) { + var pb internal.ExpandSourcesResponse + buf, err := r.Sources.MarshalBinary() + if err != nil { + return nil, err + } + pb.Sources = buf + + if r.Err != nil { + pb.Err = proto.String(r.Err.Error()) + } + return proto.Marshal(&pb) +} + +// UnmarshalBinary decodes data into r. +func (r *ExpandSourcesResponse) UnmarshalBinary(data []byte) error { + var pb internal.ExpandSourcesResponse + if err := proto.Unmarshal(data, &pb); err != nil { + return err + } + if err := r.Sources.UnmarshalBinary(pb.GetSources()); err != nil { + return err + } + + if pb.Err != nil { + r.Err = errors.New(pb.GetErr()) + } + return nil +} diff --git a/cluster/service.go b/cluster/service.go index a24d569a1f0..cedc5a76f1a 100644 --- a/cluster/service.go +++ b/cluster/service.go @@ -38,6 +38,9 @@ const ( seriesKeysReq = "seriesKeysReq" seriesKeysResp = "seriesKeysResp" + + expandSourcesReq = "expandSourcesReq" + expandSourcesResp = "expandSourcesResp" ) // Service processes data received over raw TCP connections. @@ -196,6 +199,10 @@ func (s *Service) handleConn(conn net.Conn) { s.statMap.Add(seriesKeysReq, 1) s.processSeriesKeysRequest(conn) return + case expandSourcesRequestMessage: + s.statMap.Add(expandSourcesReq, 1) + s.processExpandSourcesRequest(conn) + return default: s.Logger.Printf("cluster service message type not found: %d", typ) } @@ -439,6 +446,48 @@ func (s *Service) processSeriesKeysRequest(conn net.Conn) { } } +func (s *Service) processExpandSourcesRequest(conn net.Conn) { + var sources influxql.Sources + if err := func() error { + // Parse request. + var req ExpandSourcesRequest + if err := DecodeLV(conn, &req); err != nil { + return err + } + + // Collect iterator creators for each shard. + ics := make([]influxql.IteratorCreator, 0, len(req.ShardIDs)) + for _, shardID := range req.ShardIDs { + ic := s.TSDBStore.ShardIteratorCreator(shardID) + if ic == nil { + return nil + } + ics = append(ics, ic) + } + + // Expand sources from all shards. + a, err := influxql.IteratorCreators(ics).ExpandSources(req.Sources) + if err != nil { + return err + } + sources = a + + return nil + }(); err != nil { + s.Logger.Printf("error reading ExpandSources request: %s", err) + EncodeTLV(conn, expandSourcesResponseMessage, &ExpandSourcesResponse{Err: err}) + return + } + + // Encode success response. + if err := EncodeTLV(conn, expandSourcesResponseMessage, &ExpandSourcesResponse{ + Sources: sources, + }); err != nil { + s.Logger.Printf("error writing ExpandSources response: %s", err) + return + } +} + // ReadTLV reads a type-length-value record from r. func ReadTLV(r io.Reader) (byte, []byte, error) { typ, err := ReadType(r) diff --git a/cluster/shard_writer.go b/cluster/shard_writer.go index 9a44137a8ee..4a83f8b97d1 100644 --- a/cluster/shard_writer.go +++ b/cluster/shard_writer.go @@ -24,6 +24,9 @@ const ( seriesKeysRequestMessage seriesKeysResponseMessage + + expandSourcesRequestMessage + expandSourcesResponseMessage ) // ShardWriter writes a set of points to a shard. diff --git a/influxql/ast.go b/influxql/ast.go index cbfb1a2c045..59fa62da6e8 100644 --- a/influxql/ast.go +++ b/influxql/ast.go @@ -309,6 +309,19 @@ func (a Sources) HasSystemSource() bool { return false } +// HasRegex returns true if any of the sources are regex measurements. +func (a Sources) HasRegex() bool { + for _, s := range a { + switch s := s.(type) { + case *Measurement: + if s.Regex != nil { + return true + } + } + } + return false +} + // String returns a string representation of a Sources array. func (a Sources) String() string { var buf bytes.Buffer diff --git a/influxql/internal/internal.pb.go b/influxql/internal/internal.pb.go index 03c7c5f5806..74c19fef470 100644 --- a/influxql/internal/internal.pb.go +++ b/influxql/internal/internal.pb.go @@ -21,25 +21,23 @@ It has these top-level messages: package internal import proto "github.com/gogo/protobuf/proto" -import fmt "fmt" import math "math" // Reference imports to suppress errors if they are not otherwise used. var _ = proto.Marshal -var _ = fmt.Errorf var _ = math.Inf type Point struct { - Name *string `protobuf:"bytes,1,req,name=Name" json:"Name,omitempty"` - Tags *string `protobuf:"bytes,2,req,name=Tags" json:"Tags,omitempty"` - Time *int64 `protobuf:"varint,3,req,name=Time" json:"Time,omitempty"` - Nil *bool `protobuf:"varint,4,req,name=Nil" json:"Nil,omitempty"` - Aux []*Aux `protobuf:"bytes,5,rep,name=Aux" json:"Aux,omitempty"` - Aggregated *uint32 `protobuf:"varint,6,opt,name=Aggregated" json:"Aggregated,omitempty"` - FloatValue *float64 `protobuf:"fixed64,7,opt,name=FloatValue" json:"FloatValue,omitempty"` - IntegerValue *int64 `protobuf:"varint,8,opt,name=IntegerValue" json:"IntegerValue,omitempty"` - StringValue *string `protobuf:"bytes,9,opt,name=StringValue" json:"StringValue,omitempty"` - BooleanValue *bool `protobuf:"varint,10,opt,name=BooleanValue" json:"BooleanValue,omitempty"` + Name *string `protobuf:"bytes,1,req" json:"Name,omitempty"` + Tags *string `protobuf:"bytes,2,req" json:"Tags,omitempty"` + Time *int64 `protobuf:"varint,3,req" json:"Time,omitempty"` + Nil *bool `protobuf:"varint,4,req" json:"Nil,omitempty"` + Aux []*Aux `protobuf:"bytes,5,rep" json:"Aux,omitempty"` + Aggregated *uint32 `protobuf:"varint,6,opt" json:"Aggregated,omitempty"` + FloatValue *float64 `protobuf:"fixed64,7,opt" json:"FloatValue,omitempty"` + IntegerValue *int64 `protobuf:"varint,8,opt" json:"IntegerValue,omitempty"` + StringValue *string `protobuf:"bytes,9,opt" json:"StringValue,omitempty"` + BooleanValue *bool `protobuf:"varint,10,opt" json:"BooleanValue,omitempty"` XXX_unrecognized []byte `json:"-"` } @@ -118,11 +116,11 @@ func (m *Point) GetBooleanValue() bool { } type Aux struct { - DataType *int32 `protobuf:"varint,1,req,name=DataType" json:"DataType,omitempty"` - FloatValue *float64 `protobuf:"fixed64,2,opt,name=FloatValue" json:"FloatValue,omitempty"` - IntegerValue *int64 `protobuf:"varint,3,opt,name=IntegerValue" json:"IntegerValue,omitempty"` - StringValue *string `protobuf:"bytes,4,opt,name=StringValue" json:"StringValue,omitempty"` - BooleanValue *bool `protobuf:"varint,5,opt,name=BooleanValue" json:"BooleanValue,omitempty"` + DataType *int32 `protobuf:"varint,1,req" json:"DataType,omitempty"` + FloatValue *float64 `protobuf:"fixed64,2,opt" json:"FloatValue,omitempty"` + IntegerValue *int64 `protobuf:"varint,3,opt" json:"IntegerValue,omitempty"` + StringValue *string `protobuf:"bytes,4,opt" json:"StringValue,omitempty"` + BooleanValue *bool `protobuf:"varint,5,opt" json:"BooleanValue,omitempty"` XXX_unrecognized []byte `json:"-"` } @@ -166,22 +164,22 @@ func (m *Aux) GetBooleanValue() bool { } type IteratorOptions struct { - Expr *string `protobuf:"bytes,1,opt,name=Expr" json:"Expr,omitempty"` - Aux []string `protobuf:"bytes,2,rep,name=Aux" json:"Aux,omitempty"` - Sources []*Measurement `protobuf:"bytes,3,rep,name=Sources" json:"Sources,omitempty"` - Interval *Interval `protobuf:"bytes,4,opt,name=Interval" json:"Interval,omitempty"` - Dimensions []string `protobuf:"bytes,5,rep,name=Dimensions" json:"Dimensions,omitempty"` - Fill *int32 `protobuf:"varint,6,opt,name=Fill" json:"Fill,omitempty"` - FillValue *float64 `protobuf:"fixed64,7,opt,name=FillValue" json:"FillValue,omitempty"` - Condition *string `protobuf:"bytes,8,opt,name=Condition" json:"Condition,omitempty"` - StartTime *int64 `protobuf:"varint,9,opt,name=StartTime" json:"StartTime,omitempty"` - EndTime *int64 `protobuf:"varint,10,opt,name=EndTime" json:"EndTime,omitempty"` - Ascending *bool `protobuf:"varint,11,opt,name=Ascending" json:"Ascending,omitempty"` - Limit *int64 `protobuf:"varint,12,opt,name=Limit" json:"Limit,omitempty"` - Offset *int64 `protobuf:"varint,13,opt,name=Offset" json:"Offset,omitempty"` - SLimit *int64 `protobuf:"varint,14,opt,name=SLimit" json:"SLimit,omitempty"` - SOffset *int64 `protobuf:"varint,15,opt,name=SOffset" json:"SOffset,omitempty"` - Dedupe *bool `protobuf:"varint,16,opt,name=Dedupe" json:"Dedupe,omitempty"` + Expr *string `protobuf:"bytes,1,opt" json:"Expr,omitempty"` + Aux []string `protobuf:"bytes,2,rep" json:"Aux,omitempty"` + Sources []*Measurement `protobuf:"bytes,3,rep" json:"Sources,omitempty"` + Interval *Interval `protobuf:"bytes,4,opt" json:"Interval,omitempty"` + Dimensions []string `protobuf:"bytes,5,rep" json:"Dimensions,omitempty"` + Fill *int32 `protobuf:"varint,6,opt" json:"Fill,omitempty"` + FillValue *float64 `protobuf:"fixed64,7,opt" json:"FillValue,omitempty"` + Condition *string `protobuf:"bytes,8,opt" json:"Condition,omitempty"` + StartTime *int64 `protobuf:"varint,9,opt" json:"StartTime,omitempty"` + EndTime *int64 `protobuf:"varint,10,opt" json:"EndTime,omitempty"` + Ascending *bool `protobuf:"varint,11,opt" json:"Ascending,omitempty"` + Limit *int64 `protobuf:"varint,12,opt" json:"Limit,omitempty"` + Offset *int64 `protobuf:"varint,13,opt" json:"Offset,omitempty"` + SLimit *int64 `protobuf:"varint,14,opt" json:"SLimit,omitempty"` + SOffset *int64 `protobuf:"varint,15,opt" json:"SOffset,omitempty"` + Dedupe *bool `protobuf:"varint,16,opt" json:"Dedupe,omitempty"` XXX_unrecognized []byte `json:"-"` } @@ -302,7 +300,7 @@ func (m *IteratorOptions) GetDedupe() bool { } type Measurements struct { - Items []*Measurement `protobuf:"bytes,1,rep,name=Items" json:"Items,omitempty"` + Items []*Measurement `protobuf:"bytes,1,rep" json:"Items,omitempty"` XXX_unrecognized []byte `json:"-"` } @@ -318,11 +316,11 @@ func (m *Measurements) GetItems() []*Measurement { } type Measurement struct { - Database *string `protobuf:"bytes,1,opt,name=Database" json:"Database,omitempty"` - RetentionPolicy *string `protobuf:"bytes,2,opt,name=RetentionPolicy" json:"RetentionPolicy,omitempty"` - Name *string `protobuf:"bytes,3,opt,name=Name" json:"Name,omitempty"` - Regex *string `protobuf:"bytes,4,opt,name=Regex" json:"Regex,omitempty"` - IsTarget *bool `protobuf:"varint,5,opt,name=IsTarget" json:"IsTarget,omitempty"` + Database *string `protobuf:"bytes,1,opt" json:"Database,omitempty"` + RetentionPolicy *string `protobuf:"bytes,2,opt" json:"RetentionPolicy,omitempty"` + Name *string `protobuf:"bytes,3,opt" json:"Name,omitempty"` + Regex *string `protobuf:"bytes,4,opt" json:"Regex,omitempty"` + IsTarget *bool `protobuf:"varint,5,opt" json:"IsTarget,omitempty"` XXX_unrecognized []byte `json:"-"` } @@ -366,8 +364,8 @@ func (m *Measurement) GetIsTarget() bool { } type Interval struct { - Duration *int64 `protobuf:"varint,1,opt,name=Duration" json:"Duration,omitempty"` - Offset *int64 `protobuf:"varint,2,opt,name=Offset" json:"Offset,omitempty"` + Duration *int64 `protobuf:"varint,1,opt" json:"Duration,omitempty"` + Offset *int64 `protobuf:"varint,2,opt" json:"Offset,omitempty"` XXX_unrecognized []byte `json:"-"` } @@ -390,9 +388,9 @@ func (m *Interval) GetOffset() int64 { } type Series struct { - Name *string `protobuf:"bytes,1,opt,name=Name" json:"Name,omitempty"` - Tags []byte `protobuf:"bytes,2,opt,name=Tags" json:"Tags,omitempty"` - Aux []uint32 `protobuf:"varint,3,rep,name=Aux" json:"Aux,omitempty"` + Name *string `protobuf:"bytes,1,opt" json:"Name,omitempty"` + Tags []byte `protobuf:"bytes,2,opt" json:"Tags,omitempty"` + Aux []uint32 `protobuf:"varint,3,rep" json:"Aux,omitempty"` XXX_unrecognized []byte `json:"-"` } @@ -422,7 +420,7 @@ func (m *Series) GetAux() []uint32 { } type SeriesList struct { - Items []*Series `protobuf:"bytes,1,rep,name=Items" json:"Items,omitempty"` + Items []*Series `protobuf:"bytes,1,rep" json:"Items,omitempty"` XXX_unrecognized []byte `json:"-"` } @@ -438,12 +436,4 @@ func (m *SeriesList) GetItems() []*Series { } func init() { - proto.RegisterType((*Point)(nil), "internal.Point") - proto.RegisterType((*Aux)(nil), "internal.Aux") - proto.RegisterType((*IteratorOptions)(nil), "internal.IteratorOptions") - proto.RegisterType((*Measurements)(nil), "internal.Measurements") - proto.RegisterType((*Measurement)(nil), "internal.Measurement") - proto.RegisterType((*Interval)(nil), "internal.Interval") - proto.RegisterType((*Series)(nil), "internal.Series") - proto.RegisterType((*SeriesList)(nil), "internal.SeriesList") } diff --git a/influxql/iterator.gen.go b/influxql/iterator.gen.go index a19ba345812..27544076356 100644 --- a/influxql/iterator.gen.go +++ b/influxql/iterator.gen.go @@ -612,6 +612,10 @@ func (itr *floatAuxIterator) SeriesKeys(opt IteratorOptions) (SeriesList, error) return nil, errors.New("not implemented") } +func (itr *floatAuxIterator) ExpandSources(sources Sources) (Sources, error) { + return nil, errors.New("not implemented") +} + func (itr *floatAuxIterator) stream() { for { // Read next point. @@ -1750,6 +1754,10 @@ func (itr *integerAuxIterator) SeriesKeys(opt IteratorOptions) (SeriesList, erro return nil, errors.New("not implemented") } +func (itr *integerAuxIterator) ExpandSources(sources Sources) (Sources, error) { + return nil, errors.New("not implemented") +} + func (itr *integerAuxIterator) stream() { for { // Read next point. @@ -2888,6 +2896,10 @@ func (itr *stringAuxIterator) SeriesKeys(opt IteratorOptions) (SeriesList, error return nil, errors.New("not implemented") } +func (itr *stringAuxIterator) ExpandSources(sources Sources) (Sources, error) { + return nil, errors.New("not implemented") +} + func (itr *stringAuxIterator) stream() { for { // Read next point. @@ -4026,6 +4038,10 @@ func (itr *booleanAuxIterator) SeriesKeys(opt IteratorOptions) (SeriesList, erro return nil, errors.New("not implemented") } +func (itr *booleanAuxIterator) ExpandSources(sources Sources) (Sources, error) { + return nil, errors.New("not implemented") +} + func (itr *booleanAuxIterator) stream() { for { // Read next point. diff --git a/influxql/iterator.gen.go.tmpl b/influxql/iterator.gen.go.tmpl index 661e489300a..b4d72f2166b 100644 --- a/influxql/iterator.gen.go.tmpl +++ b/influxql/iterator.gen.go.tmpl @@ -611,7 +611,11 @@ func (itr *{{$k.name}}AuxIterator) SeriesKeys(opt IteratorOptions) (SeriesList, return nil, errors.New("not implemented") } -func (itr *{{$k.name}}AuxIterator) stream() { +func (itr *{{.name}}AuxIterator) ExpandSources(sources Sources) (Sources, error) { + return nil, errors.New("not implemented") +} + +func (itr *{{.name}}AuxIterator) stream() { for { // Read next point. p := itr.input.Next() diff --git a/influxql/iterator.go b/influxql/iterator.go index cee4cc3ae6c..b2bdc44482d 100644 --- a/influxql/iterator.go +++ b/influxql/iterator.go @@ -444,6 +444,9 @@ type IteratorCreator interface { // Returns the series keys that will be returned by this iterator. SeriesKeys(opt IteratorOptions) (SeriesList, error) + + // Expands regex sources to all matching sources. + ExpandSources(sources Sources) (Sources, error) } // IteratorCreators represents a list of iterator creators. @@ -544,6 +547,42 @@ func (a IteratorCreators) SeriesKeys(opt IteratorOptions) (SeriesList, error) { return SeriesList(seriesList), nil } +// ExpandSources expands sources across all iterator creators and returns a unique result. +func (a IteratorCreators) ExpandSources(sources Sources) (Sources, error) { + m := make(map[string]Source) + + for _, ic := range a { + expanded, err := ic.ExpandSources(sources) + if err != nil { + return nil, err + } + + for _, src := range expanded { + switch src := src.(type) { + case *Measurement: + m[src.String()] = src + default: + return nil, fmt.Errorf("IteratorCreators.ExpandSources: unsupported source type: %T", src) + } + } + } + + // Convert set to sorted slice. + names := make([]string, 0, len(m)) + for name := range m { + names = append(names, name) + } + sort.Strings(names) + + // Convert set to a list of Sources. + sorted := make(Sources, 0, len(m)) + for _, name := range names { + sorted = append(sorted, m[name]) + } + + return sorted, nil +} + // IteratorOptions is an object passed to CreateIterator to specify creation options. type IteratorOptions struct { // Expression to iterate for. diff --git a/influxql/iterator_test.go b/influxql/iterator_test.go index 668d7e47df4..5d9ca71d3dd 100644 --- a/influxql/iterator_test.go +++ b/influxql/iterator_test.go @@ -959,6 +959,7 @@ type IteratorCreator struct { CreateIteratorFn func(opt influxql.IteratorOptions) (influxql.Iterator, error) FieldDimensionsFn func(sources influxql.Sources) (fields, dimensions map[string]struct{}, err error) SeriesKeysFn func(opt influxql.IteratorOptions) (influxql.SeriesList, error) + ExpandSourcesFn func(sources influxql.Sources) (influxql.Sources, error) } func (ic *IteratorCreator) CreateIterator(opt influxql.IteratorOptions) (influxql.Iterator, error) { @@ -1010,6 +1011,10 @@ func (ic *IteratorCreator) SeriesKeys(opt influxql.IteratorOptions) (influxql.Se return influxql.SeriesList(seriesList), nil } +func (ic *IteratorCreator) ExpandSources(sources influxql.Sources) (influxql.Sources, error) { + return ic.ExpandSourcesFn(sources) +} + // Test implementation of influxql.FloatIterator type FloatIterator struct { Points []influxql.FloatPoint diff --git a/tsdb/shard.go b/tsdb/shard.go index 3b9b58d5c3b..8f236f69480 100644 --- a/tsdb/shard.go +++ b/tsdb/shard.go @@ -491,6 +491,53 @@ func (s *Shard) SeriesKeys(opt influxql.IteratorOptions) (influxql.SeriesList, e return s.engine.SeriesKeys(opt) } +// ExpandSources expands regex sources and removes duplicates. +// NOTE: sources must be normalized (db and rp set) before calling this function. +func (s *Shard) ExpandSources(sources influxql.Sources) (influxql.Sources, error) { + // Use a map as a set to prevent duplicates. + set := map[string]influxql.Source{} + + // Iterate all sources, expanding regexes when they're found. + for _, source := range sources { + switch src := source.(type) { + case *influxql.Measurement: + // Add non-regex measurements directly to the set. + if src.Regex == nil { + set[src.String()] = src + continue + } + + // Loop over matching measurements. + for _, m := range s.index.MeasurementsByRegex(src.Regex.Val) { + other := &influxql.Measurement{ + Database: src.Database, + RetentionPolicy: src.RetentionPolicy, + Name: m.Name, + } + set[other.String()] = other + } + + default: + return nil, fmt.Errorf("expandSources: unsupported source type: %T", source) + } + } + + // Convert set to sorted slice. + names := make([]string, 0, len(set)) + for name := range set { + names = append(names, name) + } + sort.Strings(names) + + // Convert set to a list of Sources. + expanded := make(influxql.Sources, 0, len(set)) + for _, name := range names { + expanded = append(expanded, set[name]) + } + + return expanded, nil +} + // Shards represents a sortable list of shards. type Shards []*Shard @@ -844,6 +891,9 @@ func (ic *shardIteratorCreator) FieldDimensions(sources influxql.Sources) (field func (ic *shardIteratorCreator) SeriesKeys(opt influxql.IteratorOptions) (influxql.SeriesList, error) { return ic.sh.SeriesKeys(opt) } +func (ic *shardIteratorCreator) ExpandSources(sources influxql.Sources) (influxql.Sources, error) { + return ic.sh.ExpandSources(sources) +} func NewFieldKeysIterator(sh *Shard, opt influxql.IteratorOptions) (influxql.Iterator, error) { fn := func(m *Measurement) []string { diff --git a/tsdb/store.go b/tsdb/store.go index 51266de99d3..eb67902e95d 100644 --- a/tsdb/store.go +++ b/tsdb/store.go @@ -486,6 +486,15 @@ func (s *Store) ShardRelativePath(id uint64) (string, error) { // DeleteSeries loops through the local shards and deletes the series data and metadata for the passed in series keys func (s *Store) DeleteSeries(database string, sources []influxql.Source, condition influxql.Expr) error { + // Expand regex expressions in the FROM clause. + a, err := s.ExpandSources(sources) + if err != nil { + return err + } else if sources != nil && len(sources) != 0 && len(a) == 0 { + return nil + } + sources = a + s.mu.RLock() defer s.mu.RUnlock() @@ -495,15 +504,6 @@ func (s *Store) DeleteSeries(database string, sources []influxql.Source, conditi return nil } - // Expand regex expressions in the FROM clause. - a, err := s.expandSources(sources) - if err != nil { - return err - } else if sources != nil && len(sources) != 0 && len(a) == 0 { - return nil - } - sources = a - measurements, err := measurementsFromSourcesOrDB(db, sources...) if err != nil { return err @@ -566,63 +566,21 @@ func (s *Store) deleteSeries(database string, seriesKeys []string) error { return nil } -// ExpandSources expands regex sources and removes duplicates. -// NOTE: sources must be normalized (db and rp set) before calling this function. +// ExpandSources expands sources against all local shards. func (s *Store) ExpandSources(sources influxql.Sources) (influxql.Sources, error) { - s.mu.RLock() - defer s.mu.RUnlock() - return s.expandSources(sources) + return s.IteratorCreators().ExpandSources(sources) } -func (s *Store) expandSources(sources influxql.Sources) (influxql.Sources, error) { - // Use a map as a set to prevent duplicates. - set := map[string]influxql.Source{} - - // Iterate all sources, expanding regexes when they're found. - for _, source := range sources { - switch src := source.(type) { - case *influxql.Measurement: - // Add non-regex measurements directly to the set. - if src.Regex == nil { - set[src.String()] = src - continue - } - - // Lookup the database. - db := s.databaseIndexes[src.Database] - if db == nil { - return nil, nil - } - - // Loop over matching measurements. - for _, m := range db.MeasurementsByRegex(src.Regex.Val) { - other := &influxql.Measurement{ - Database: src.Database, - RetentionPolicy: src.RetentionPolicy, - Name: m.Name, - } - set[other.String()] = other - } - - default: - return nil, fmt.Errorf("expandSources: unsupported source type: %T", source) - } - } - - // Convert set to sorted slice. - names := make([]string, 0, len(set)) - for name := range set { - names = append(names, name) - } - sort.Strings(names) +// IteratorCreators returns a set of all local shards as iterator creators. +func (s *Store) IteratorCreators() influxql.IteratorCreators { + s.mu.Lock() + defer s.mu.Unlock() - // Convert set to a list of Sources. - expanded := make(influxql.Sources, 0, len(set)) - for _, name := range names { - expanded = append(expanded, set[name]) + a := make(influxql.IteratorCreators, 0, len(s.shards)) + for _, sh := range s.shards { + a = append(a, sh) } - - return expanded, nil + return a } // WriteToShard writes a list of points to a shard identified by its ID. From 45dfa3623083f9354cc16a640735df9734c25f0a Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Thu, 25 Feb 2016 11:17:59 +0000 Subject: [PATCH 05/14] Add default port --- services/httpd/config.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/services/httpd/config.go b/services/httpd/config.go index d730a69e9cd..c17507cd72a 100644 --- a/services/httpd/config.go +++ b/services/httpd/config.go @@ -1,5 +1,8 @@ package httpd +// DefaultBindAddress is the default address to bind to. +const DefaultBindAddress = ":8086" + // Config represents a configuration for a HTTP service. type Config struct { Enabled bool `toml:"enabled"` From e96185f993adf60b17aef6321b546ba4f350ba76 Mon Sep 17 00:00:00 2001 From: Ben Johnson Date: Fri, 4 Mar 2016 11:01:41 -0700 Subject: [PATCH 06/14] add support for remote expansion of regex This commit moves the `tsdb.Store.ExpandSources()` function onto the `influxql.IteratorCreator` and provides support for issuing source expansion across a cluster. --- influxql/iterator.gen.go.tmpl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/influxql/iterator.gen.go.tmpl b/influxql/iterator.gen.go.tmpl index b4d72f2166b..3f9f1c7e77d 100644 --- a/influxql/iterator.gen.go.tmpl +++ b/influxql/iterator.gen.go.tmpl @@ -615,6 +615,10 @@ func (itr *{{.name}}AuxIterator) ExpandSources(sources Sources) (Sources, error) return nil, errors.New("not implemented") } +func (itr *{{.name}}AuxIterator) ExpandSources(sources Sources) (Sources, error) { + return nil, errors.New("not implemented") +} + func (itr *{{.name}}AuxIterator) stream() { for { // Read next point. From 27cfaa4b7a517364556cdd1a7225cab513784bc6 Mon Sep 17 00:00:00 2001 From: Cory LaNou Date: Tue, 8 Mar 2016 13:59:33 -0600 Subject: [PATCH 07/14] in memory meta, single node configs, etc. --- cluster/meta_client.go | 5 - cluster/meta_executor.go | 171 --- cluster/meta_executor_test.go | 121 -- cluster/points_writer.go | 171 +-- cluster/points_writer_test.go | 108 +- cluster/query_executor.go | 264 +---- cluster/query_executor_test.go | 69 -- cluster/rpc.go | 7 +- cluster/service.go | 69 +- cluster/shard_writer.go | 191 ---- cluster/shard_writer_test.go | 224 ---- cmd/influx/cli/cli.go | 4 +- cmd/influxd/restore/restore.go | 36 +- cmd/influxd/run/backup_restore_test.go | 17 +- cmd/influxd/run/command.go | 21 - cmd/influxd/run/config.go | 41 +- cmd/influxd/run/server.go | 348 ++---- cmd/influxd/run/server_helpers_test.go | 33 +- cmd/influxd/run/server_test.go | 183 ++- models/consistency.go | 46 + services/collectd/service.go | 7 +- services/graphite/service.go | 28 +- services/hh/config.go | 68 -- services/hh/config_test.go | 73 -- services/hh/doc.go | 5 - services/hh/limiter.go | 61 - services/hh/limiter_test.go | 47 - services/hh/node_processor.go | 295 ----- services/hh/node_processor_test.go | 155 --- services/hh/queue.go | 710 ------------ services/hh/queue_test.go | 327 ------ services/hh/service.go | 275 ----- services/httpd/handler.go | 48 +- services/httpd/handler_test.go | 14 - services/meta/client.go | 1106 ++++++------------ services/meta/client_test.go | 776 +++++++++++++ services/meta/config.go | 92 +- services/meta/config_test.go | 21 +- services/meta/data.go | 331 +----- services/meta/handler.go | 481 -------- services/meta/raft_state.go | 352 ------ services/meta/service.go | 210 ---- services/meta/service_test.go | 1461 ------------------------ services/meta/store.go | 450 -------- services/meta/store_fsm.go | 654 ----------- services/opentsdb/handler.go | 12 +- services/opentsdb/service.go | 55 +- services/udp/service.go | 7 +- tsdb/config.go | 6 +- tsdb/config_test.go | 4 - 50 files changed, 1546 insertions(+), 8714 deletions(-) delete mode 100644 cluster/meta_executor.go delete mode 100644 cluster/meta_executor_test.go delete mode 100644 cluster/shard_writer.go delete mode 100644 cluster/shard_writer_test.go create mode 100644 models/consistency.go delete mode 100644 services/hh/config.go delete mode 100644 services/hh/config_test.go delete mode 100644 services/hh/doc.go delete mode 100644 services/hh/limiter.go delete mode 100644 services/hh/limiter_test.go delete mode 100644 services/hh/node_processor.go delete mode 100644 services/hh/node_processor_test.go delete mode 100644 services/hh/queue.go delete mode 100644 services/hh/queue_test.go delete mode 100644 services/hh/service.go create mode 100644 services/meta/client_test.go delete mode 100644 services/meta/handler.go delete mode 100644 services/meta/raft_state.go delete mode 100644 services/meta/service.go delete mode 100644 services/meta/service_test.go delete mode 100644 services/meta/store.go delete mode 100644 services/meta/store_fsm.go diff --git a/cluster/meta_client.go b/cluster/meta_client.go index dfdb7a09d7c..f72ec3ca76e 100644 --- a/cluster/meta_client.go +++ b/cluster/meta_client.go @@ -17,16 +17,11 @@ type MetaClient interface { CreateUser(name, password string, admin bool) (*meta.UserInfo, error) Database(name string) (*meta.DatabaseInfo, error) Databases() ([]meta.DatabaseInfo, error) - DataNode(id uint64) (*meta.NodeInfo, error) - DataNodes() ([]meta.NodeInfo, error) - DeleteDataNode(id uint64) error - DeleteMetaNode(id uint64) error DropContinuousQuery(database, name string) error DropDatabase(name string) error DropRetentionPolicy(database, name string) error DropSubscription(database, rp, name string) error DropUser(name string) error - MetaNodes() ([]meta.NodeInfo, error) RetentionPolicy(database, name string) (rpi *meta.RetentionPolicyInfo, err error) SetAdminPrivilege(username string, admin bool) error SetDefaultRetentionPolicy(database, name string) error diff --git a/cluster/meta_executor.go b/cluster/meta_executor.go deleted file mode 100644 index 3433b0fa61f..00000000000 --- a/cluster/meta_executor.go +++ /dev/null @@ -1,171 +0,0 @@ -package cluster - -import ( - "fmt" - "log" - "net" - "os" - "sync" - "time" - - "github.com/influxdata/influxdb" - "github.com/influxdata/influxdb/influxql" - "github.com/influxdata/influxdb/services/meta" -) - -const ( - metaExecutorWriteTimeout = 5 * time.Second - metaExecutorMaxWriteConnections = 10 -) - -// MetaExecutor executes meta queries on all data nodes. -type MetaExecutor struct { - mu sync.RWMutex - timeout time.Duration - pool *clientPool - maxConnections int - Logger *log.Logger - Node *influxdb.Node - - nodeExecutor interface { - executeOnNode(stmt influxql.Statement, database string, node *meta.NodeInfo) error - } - - MetaClient interface { - DataNode(id uint64) (ni *meta.NodeInfo, err error) - DataNodes() ([]meta.NodeInfo, error) - } -} - -// NewMetaExecutor returns a new initialized *MetaExecutor. -func NewMetaExecutor() *MetaExecutor { - m := &MetaExecutor{ - timeout: metaExecutorWriteTimeout, - pool: newClientPool(), - maxConnections: metaExecutorMaxWriteConnections, - Logger: log.New(os.Stderr, "[meta-executor] ", log.LstdFlags), - } - m.nodeExecutor = m - - return m -} - -// remoteNodeError wraps an error with context about a node that -// returned the error. -type remoteNodeError struct { - id uint64 - err error -} - -func (e remoteNodeError) Error() string { - return fmt.Sprintf("partial success, node %d may be down (%s)", e.id, e.err) -} - -// ExecuteStatement executes a single InfluxQL statement on all nodes in the cluster concurrently. -func (m *MetaExecutor) ExecuteStatement(stmt influxql.Statement, database string) error { - // Get a list of all nodes the query needs to be executed on. - nodes, err := m.MetaClient.DataNodes() - if err != nil { - return err - } else if len(nodes) < 1 { - return nil - } - - // Start a goroutine to execute the statement on each of the remote nodes. - var wg sync.WaitGroup - errs := make(chan error, len(nodes)-1) - for _, node := range nodes { - if m.Node.ID == node.ID { - continue // Don't execute statement on ourselves. - } - - wg.Add(1) - go func(node meta.NodeInfo) { - defer wg.Done() - if err := m.nodeExecutor.executeOnNode(stmt, database, &node); err != nil { - errs <- remoteNodeError{id: node.ID, err: err} - } - }(node) - } - - // Wait on n-1 nodes to execute the statement and respond. - wg.Wait() - - select { - case err = <-errs: - return err - default: - return nil - } -} - -// executeOnNode executes a single InfluxQL statement on a single node. -func (m *MetaExecutor) executeOnNode(stmt influxql.Statement, database string, node *meta.NodeInfo) error { - // We're executing on a remote node so establish a connection. - c, err := m.dial(node.ID) - if err != nil { - return err - } - - conn, ok := c.(*pooledConn) - if !ok { - panic("wrong connection type in MetaExecutor") - } - // Return connection to pool by "closing" it. - defer conn.Close() - - // Build RPC request. - var request ExecuteStatementRequest - request.SetStatement(stmt.String()) - request.SetDatabase(database) - - // Marshal into protocol buffer. - buf, err := request.MarshalBinary() - if err != nil { - return err - } - - // Send request. - conn.SetWriteDeadline(time.Now().Add(m.timeout)) - if err := WriteTLV(conn, executeStatementRequestMessage, buf); err != nil { - conn.MarkUnusable() - return err - } - - // Read the response. - conn.SetReadDeadline(time.Now().Add(m.timeout)) - _, buf, err = ReadTLV(conn) - if err != nil { - conn.MarkUnusable() - return err - } - - // Unmarshal response. - var response ExecuteStatementResponse - if err := response.UnmarshalBinary(buf); err != nil { - return err - } - - if response.Code() != 0 { - return fmt.Errorf("error code %d: %s", response.Code(), response.Message()) - } - - return nil -} - -// dial returns a connection to a single node in the cluster. -func (m *MetaExecutor) dial(nodeID uint64) (net.Conn, error) { - // If we don't have a connection pool for that addr yet, create one - _, ok := m.pool.getPool(nodeID) - if !ok { - factory := &connFactory{nodeID: nodeID, clientPool: m.pool, timeout: m.timeout} - factory.metaClient = m.MetaClient - - p, err := NewBoundedPool(1, m.maxConnections, m.timeout, factory.dial) - if err != nil { - return nil, err - } - m.pool.setPool(nodeID, p) - } - return m.pool.conn(nodeID) -} diff --git a/cluster/meta_executor_test.go b/cluster/meta_executor_test.go deleted file mode 100644 index 32eda8e6e07..00000000000 --- a/cluster/meta_executor_test.go +++ /dev/null @@ -1,121 +0,0 @@ -package cluster - -import ( - "fmt" - "sync" - "testing" - - "github.com/influxdata/influxdb" - "github.com/influxdata/influxdb/influxql" - "github.com/influxdata/influxdb/services/meta" -) - -func Test_ExecuteStatement(t *testing.T) { - numOfNodes := 3 - - mock := newMockExecutor() - // Expect each statement twice because we have 3 nodes, 2 of which - // are remote and should be executed on. - mock.expect("DROP RETENTION POLICY rp0 on foo") - mock.expect("DROP RETENTION POLICY rp0 on foo") - mock.expect("DROP DATABASE foo") - mock.expect("DROP DATABASE foo") - - e := NewMetaExecutor() - e.MetaClient = newMockMetaClient(numOfNodes) - e.Node = influxdb.NewNode("/tmp/node") - e.Node.ID = 1 - // Replace MetaExecutor's nodeExecutor with our mock. - e.nodeExecutor = mock - - if err := e.ExecuteStatement(mustParseStatement("DROP RETENTION POLICY rp0 on foo"), "foo"); err != nil { - t.Fatal(err) - } - if err := e.ExecuteStatement(mustParseStatement("DROP DATABASE foo"), "foo"); err != nil { - t.Fatal(err) - } - - if err := mock.done(); err != nil { - t.Fatal(err) - } -} - -type mockExecutor struct { - mu sync.Mutex - expectStatements []influxql.Statement - idx int -} - -func newMockExecutor() *mockExecutor { - return &mockExecutor{ - idx: -1, - } -} - -func (e *mockExecutor) expect(stmt string) { - s := mustParseStatement(stmt) - e.expectStatements = append(e.expectStatements, s) -} - -func (e *mockExecutor) done() error { - if e.idx+1 != len(e.expectStatements) { - return fmt.Errorf("expected %d mockExecuteOnNode calls, got %d", len(e.expectStatements), e.idx+1) - } - return nil -} - -func (e *mockExecutor) executeOnNode(stmt influxql.Statement, database string, node *meta.NodeInfo) error { - e.mu.Lock() - defer e.mu.Unlock() - - e.idx++ - - if e.idx > len(e.expectStatements)-1 { - return fmt.Errorf("extra statement: %s", stmt.String()) - } - - if e.expectStatements[e.idx].String() != stmt.String() { - return fmt.Errorf("unexpected statement:\n\texp: %s\n\tgot: %s\n", e.expectStatements[e.idx].String(), stmt.String()) - } - return nil -} - -func mustParseStatement(stmt string) influxql.Statement { - s, err := influxql.ParseStatement(stmt) - if err != nil { - panic(err) - } - return s -} - -type mockMetaClient struct { - nodes []meta.NodeInfo -} - -func newMockMetaClient(nodeCnt int) *mockMetaClient { - c := &mockMetaClient{} - for i := 0; i < nodeCnt; i++ { - n := meta.NodeInfo{ - ID: uint64(i + 1), - Host: fmt.Sprintf("localhost:%d", 8000+i), - TCPHost: fmt.Sprintf("localhost:%d", 9000+i), - } - c.nodes = append(c.nodes, n) - } - - return c -} - -func (c *mockMetaClient) DataNode(id uint64) (ni *meta.NodeInfo, err error) { - for i := 0; i < len(c.nodes); i++ { - if c.nodes[i].ID == id { - ni = &c.nodes[i] - return - } - } - return -} - -func (c *mockMetaClient) DataNodes() ([]meta.NodeInfo, error) { - return c.nodes, nil -} diff --git a/cluster/points_writer.go b/cluster/points_writer.go index da21cb51e37..36438bc7374 100644 --- a/cluster/points_writer.go +++ b/cluster/points_writer.go @@ -3,10 +3,8 @@ package cluster import ( "errors" "expvar" - "fmt" "log" "os" - "strings" "sync" "time" @@ -16,10 +14,6 @@ import ( "github.com/influxdata/influxdb/tsdb" ) -// ConsistencyLevel represent a required replication criteria before a write can -// be returned as successful -type ConsistencyLevel int - // The statistics generated by the "write" mdoule const ( statWriteReq = "req" @@ -35,20 +29,6 @@ const ( statSubWriteDrop = "subWriteDrop" ) -const ( - // ConsistencyLevelAny allows for hinted hand off, potentially no write happened yet - ConsistencyLevelAny ConsistencyLevel = iota - - // ConsistencyLevelOne requires at least one data node acknowledged a write - ConsistencyLevelOne - - // ConsistencyLevelQuorum requires a quorum of data nodes to acknowledge a write - ConsistencyLevelQuorum - - // ConsistencyLevelAll requires all data nodes to acknowledge a write - ConsistencyLevelAll -) - var ( // ErrTimeout is returned when a write times out. ErrTimeout = errors.New("timeout") @@ -59,28 +39,8 @@ var ( // ErrWriteFailed is returned when no writes succeeded. ErrWriteFailed = errors.New("write failed") - - // ErrInvalidConsistencyLevel is returned when parsing the string version - // of a consistency level. - ErrInvalidConsistencyLevel = errors.New("invalid consistency level") ) -// ParseConsistencyLevel converts a consistency level string to the corresponding ConsistencyLevel const -func ParseConsistencyLevel(level string) (ConsistencyLevel, error) { - switch strings.ToLower(level) { - case "any": - return ConsistencyLevelAny, nil - case "one": - return ConsistencyLevelOne, nil - case "quorum": - return ConsistencyLevelQuorum, nil - case "all": - return ConsistencyLevelAll, nil - default: - return 0, ErrInvalidConsistencyLevel - } -} - // PointsWriter handles writes across multiple local and remote data nodes. type PointsWriter struct { mu sync.RWMutex @@ -222,10 +182,9 @@ func (w *PointsWriter) MapShards(wp *WritePointsRequest) (*ShardMapping, error) // a cluster structure for information. This is to avoid a circular dependency func (w *PointsWriter) WritePointsInto(p *IntoWriteRequest) error { req := WritePointsRequest{ - Database: p.Database, - RetentionPolicy: p.RetentionPolicy, - ConsistencyLevel: ConsistencyLevelAny, - Points: p.Points, + Database: p.Database, + RetentionPolicy: p.RetentionPolicy, + Points: p.Points, } return w.WritePoints(&req) } @@ -255,7 +214,7 @@ func (w *PointsWriter) WritePoints(p *WritePointsRequest) error { ch := make(chan error, len(shardMappings.Points)) for shardID, points := range shardMappings.Points { go func(shard *meta.ShardInfo, database, retentionPolicy string, points []models.Point) { - ch <- w.writeToShard(shard, p.Database, p.RetentionPolicy, p.ConsistencyLevel, points) + ch <- w.writeToShard(shard, p.Database, p.RetentionPolicy, points) }(shardMappings.Shards[shardID], p.Database, p.RetentionPolicy, points) } @@ -288,112 +247,28 @@ func (w *PointsWriter) WritePoints(p *WritePointsRequest) error { return nil } -// writeToShards writes points to a shard and ensures a write consistency level has been met. If the write -// partially succeeds, ErrPartialWrite is returned. -func (w *PointsWriter) writeToShard(shard *meta.ShardInfo, database, retentionPolicy string, - consistency ConsistencyLevel, points []models.Point) error { - // The required number of writes to achieve the requested consistency level - required := len(shard.Owners) - switch consistency { - case ConsistencyLevelAny, ConsistencyLevelOne: - required = 1 - case ConsistencyLevelQuorum: - required = required/2 + 1 - } - - // response channel for each shard writer go routine - type AsyncWriteResult struct { - Owner meta.ShardOwner - Err error - } - ch := make(chan *AsyncWriteResult, len(shard.Owners)) - - for _, owner := range shard.Owners { - go func(shardID uint64, owner meta.ShardOwner, points []models.Point) { - if w.Node.ID == owner.NodeID { - w.statMap.Add(statPointWriteReqLocal, int64(len(points))) - - err := w.TSDBStore.WriteToShard(shardID, points) - // If we've written to shard that should exist on the current node, but the store has - // not actually created this shard, tell it to create it and retry the write - if err == tsdb.ErrShardNotFound { - err = w.TSDBStore.CreateShard(database, retentionPolicy, shardID) - if err != nil { - ch <- &AsyncWriteResult{owner, err} - return - } - err = w.TSDBStore.WriteToShard(shardID, points) - } - ch <- &AsyncWriteResult{owner, err} - return - } - - w.statMap.Add(statPointWriteReqRemote, int64(len(points))) - err := w.ShardWriter.WriteShard(shardID, owner.NodeID, points) - if err != nil && tsdb.IsRetryable(err) { - // The remote write failed so queue it via hinted handoff - w.statMap.Add(statWritePointReqHH, int64(len(points))) - hherr := w.HintedHandoff.WriteShard(shardID, owner.NodeID, points) - if hherr != nil { - ch <- &AsyncWriteResult{owner, hherr} - return - } - - // If the write consistency level is ANY, then a successful hinted handoff can - // be considered a successful write so send nil to the response channel - // otherwise, let the original error propagate to the response channel - if hherr == nil && consistency == ConsistencyLevelAny { - ch <- &AsyncWriteResult{owner, nil} - return - } - } - ch <- &AsyncWriteResult{owner, err} +// writeToShards writes points to a shard. +func (w *PointsWriter) writeToShard(shard *meta.ShardInfo, database, retentionPolicy string, points []models.Point) error { + w.statMap.Add(statPointWriteReqLocal, int64(len(points))) - }(shard.ID, owner, points) - } - - var wrote int - timeout := time.After(w.WriteTimeout) - var writeError error - for range shard.Owners { - select { - case <-w.closing: - return ErrWriteFailed - case <-timeout: - w.statMap.Add(statWriteTimeout, 1) - // return timeout error to caller - return ErrTimeout - case result := <-ch: - // If the write returned an error, continue to the next response - if result.Err != nil { - w.statMap.Add(statWriteErr, 1) - w.Logger.Printf("write failed for shard %d on node %d: %v", shard.ID, result.Owner.NodeID, result.Err) - - // Keep track of the first error we see to return back to the client - if writeError == nil { - writeError = result.Err - } - continue - } - - wrote++ - - // We wrote the required consistency level - if wrote >= required { - w.statMap.Add(statWriteOK, 1) - return nil - } + err := w.TSDBStore.WriteToShard(shard.ID, points) + // If we've written to shard that should exist on the current node, but the store has + // not actually created this shard, tell it to create it and retry the write + if err == tsdb.ErrShardNotFound { + err = w.TSDBStore.CreateShard(database, retentionPolicy, shard.ID) + if err != nil { + w.Logger.Printf("write failed for shard %d: %v", shard.ID, err) + w.statMap.Add(statWriteErr, 1) + return err } } - - if wrote > 0 { - w.statMap.Add(statWritePartial, 1) - return ErrPartialWrite - } - - if writeError != nil { - return fmt.Errorf("write failed: %v", writeError) + err = w.TSDBStore.WriteToShard(shard.ID, points) + if err != nil { + w.Logger.Printf("write failed for shard %d: %v", shard.ID, err) + w.statMap.Add(statWriteErr, 1) + return err } - return ErrWriteFailed + w.statMap.Add(statWriteOK, 1) + return nil } diff --git a/cluster/points_writer_test.go b/cluster/points_writer_test.go index c0cc0f1fbfa..96086bdd05c 100644 --- a/cluster/points_writer_test.go +++ b/cluster/points_writer_test.go @@ -31,9 +31,8 @@ func TestPointsWriter_MapShards_One(t *testing.T) { c := cluster.PointsWriter{MetaClient: ms} pr := &cluster.WritePointsRequest{ - Database: "mydb", - RetentionPolicy: "myrp", - ConsistencyLevel: cluster.ConsistencyLevelOne, + Database: "mydb", + RetentionPolicy: "myrp", } pr.AddPoint("cpu", 1.0, time.Now(), nil) @@ -81,9 +80,8 @@ func TestPointsWriter_MapShards_Multiple(t *testing.T) { c := cluster.PointsWriter{MetaClient: ms} pr := &cluster.WritePointsRequest{ - Database: "mydb", - RetentionPolicy: "myrp", - ConsistencyLevel: cluster.ConsistencyLevelOne, + Database: "mydb", + RetentionPolicy: "myrp", } // Three points that range over the shardGroup duration (1h) and should map to two @@ -126,117 +124,24 @@ func TestPointsWriter_WritePoints(t *testing.T) { name string database string retentionPolicy string - consistency cluster.ConsistencyLevel // the responses returned by each shard write call. node ID 1 = pos 0 err []error expErr error }{ - // Consistency one { name: "write one success", database: "mydb", retentionPolicy: "myrp", - consistency: cluster.ConsistencyLevelOne, err: []error{nil, nil, nil}, expErr: nil, }, - { - name: "write one error", - database: "mydb", - retentionPolicy: "myrp", - consistency: cluster.ConsistencyLevelOne, - err: []error{fmt.Errorf("a failure"), fmt.Errorf("a failure"), fmt.Errorf("a failure")}, - expErr: fmt.Errorf("write failed: a failure"), - }, - - // Consistency any - { - name: "write any success", - database: "mydb", - retentionPolicy: "myrp", - consistency: cluster.ConsistencyLevelAny, - err: []error{fmt.Errorf("a failure"), nil, fmt.Errorf("a failure")}, - expErr: nil, - }, - // Consistency all - { - name: "write all success", - database: "mydb", - retentionPolicy: "myrp", - consistency: cluster.ConsistencyLevelAll, - err: []error{nil, nil, nil}, - expErr: nil, - }, - { - name: "write all, 2/3, partial write", - database: "mydb", - retentionPolicy: "myrp", - consistency: cluster.ConsistencyLevelAll, - err: []error{nil, fmt.Errorf("a failure"), nil}, - expErr: cluster.ErrPartialWrite, - }, - { - name: "write all, 1/3 (failure)", - database: "mydb", - retentionPolicy: "myrp", - consistency: cluster.ConsistencyLevelAll, - err: []error{nil, fmt.Errorf("a failure"), fmt.Errorf("a failure")}, - expErr: cluster.ErrPartialWrite, - }, - - // Consistency quorum - { - name: "write quorum, 1/3 failure", - consistency: cluster.ConsistencyLevelQuorum, - database: "mydb", - retentionPolicy: "myrp", - err: []error{fmt.Errorf("a failure"), fmt.Errorf("a failure"), nil}, - expErr: cluster.ErrPartialWrite, - }, - { - name: "write quorum, 2/3 success", - database: "mydb", - retentionPolicy: "myrp", - consistency: cluster.ConsistencyLevelQuorum, - err: []error{nil, nil, fmt.Errorf("a failure")}, - expErr: nil, - }, - { - name: "write quorum, 3/3 success", - database: "mydb", - retentionPolicy: "myrp", - consistency: cluster.ConsistencyLevelQuorum, - err: []error{nil, nil, nil}, - expErr: nil, - }, - - // Error write error - { - name: "no writes succeed", - database: "mydb", - retentionPolicy: "myrp", - consistency: cluster.ConsistencyLevelOne, - err: []error{fmt.Errorf("a failure"), fmt.Errorf("a failure"), fmt.Errorf("a failure")}, - expErr: fmt.Errorf("write failed: a failure"), - }, - - // Hinted handoff w/ ANY - { - name: "hinted handoff write succeed", - database: "mydb", - retentionPolicy: "myrp", - consistency: cluster.ConsistencyLevelAny, - err: []error{fmt.Errorf("a failure"), fmt.Errorf("a failure"), fmt.Errorf("a failure")}, - expErr: nil, - }, // Write to non-existent database { name: "write to non-existent database", database: "doesnt_exist", retentionPolicy: "", - consistency: cluster.ConsistencyLevelAny, err: []error{nil, nil, nil}, expErr: fmt.Errorf("database not found: doesnt_exist"), }, @@ -245,9 +150,8 @@ func TestPointsWriter_WritePoints(t *testing.T) { for _, test := range tests { pr := &cluster.WritePointsRequest{ - Database: test.database, - RetentionPolicy: test.retentionPolicy, - ConsistencyLevel: test.consistency, + Database: test.database, + RetentionPolicy: test.retentionPolicy, } // Three points that range over the shardGroup duration (1h) and should map to two diff --git a/cluster/query_executor.go b/cluster/query_executor.go index e8e83928a18..42e22a75220 100644 --- a/cluster/query_executor.go +++ b/cluster/query_executor.go @@ -8,8 +8,6 @@ import ( "io" "io/ioutil" "log" - "math/rand" - "net" "sort" "strconv" "time" @@ -38,9 +36,6 @@ type QueryExecutor struct { // Used for rewriting points back into system for SELECT INTO statements. PointsWriter *PointsWriter - // Used for executing meta statements on all data nodes. - MetaExecutor *MetaExecutor - // Remote execution timeout Timeout time.Duration @@ -149,7 +144,7 @@ func (e *QueryExecutor) executeQuery(query *influxql.Query, database string, chu case *influxql.DropRetentionPolicyStatement: err = e.executeDropRetentionPolicyStatement(stmt) case *influxql.DropServerStatement: - err = e.executeDropServerStatement(stmt) + err = influxql.ErrInvalidQuery case *influxql.DropSubscriptionStatement: err = e.executeDropSubscriptionStatement(stmt) case *influxql.DropUserStatement: @@ -173,7 +168,8 @@ func (e *QueryExecutor) executeQuery(query *influxql.Query, database string, chu case *influxql.ShowRetentionPoliciesStatement: rows, err = e.executeShowRetentionPoliciesStatement(stmt) case *influxql.ShowServersStatement: - rows, err = e.executeShowServersStatement(stmt) + // TODO: corylanou add this back for single node + err = influxql.ErrInvalidQuery case *influxql.ShowShardsStatement: rows, err = e.executeShowShardsStatement(stmt) case *influxql.ShowShardGroupsStatement: @@ -294,12 +290,7 @@ func (e *QueryExecutor) executeDropDatabaseStatement(stmt *influxql.DropDatabase } // Locally delete the datababse. - if err := e.TSDBStore.DeleteDatabase(stmt.Name); err != nil { - return err - } - - // Execute the statement on the other data nodes in the cluster. - return e.MetaExecutor.ExecuteStatement(stmt, "") + return e.TSDBStore.DeleteDatabase(stmt.Name) } func (e *QueryExecutor) executeDropMeasurementStatement(stmt *influxql.DropMeasurementStatement, database string) error { @@ -310,12 +301,7 @@ func (e *QueryExecutor) executeDropMeasurementStatement(stmt *influxql.DropMeasu } // Locally drop the measurement - if err := e.TSDBStore.DeleteMeasurement(database, stmt.Name); err != nil { - return err - } - - // Execute the statement on the other data nodes in the cluster. - return e.MetaExecutor.ExecuteStatement(stmt, database) + return e.TSDBStore.DeleteMeasurement(database, stmt.Name) } func (e *QueryExecutor) executeDropSeriesStatement(stmt *influxql.DropSeriesStatement, database string) error { @@ -331,19 +317,7 @@ func (e *QueryExecutor) executeDropSeriesStatement(stmt *influxql.DropSeriesStat } // Locally drop the series. - if err := e.TSDBStore.DeleteSeries(database, stmt.Sources, stmt.Condition); err != nil { - return err - } - - // Execute the statement on the other data nodes in the cluster. - return e.MetaExecutor.ExecuteStatement(stmt, database) -} - -func (e *QueryExecutor) executeDropServerStatement(q *influxql.DropServerStatement) error { - if q.Meta { - return e.MetaClient.DeleteMetaNode(q.NodeID) - } - return e.MetaClient.DeleteDataNode(q.NodeID) + return e.TSDBStore.DeleteSeries(database, stmt.Sources, stmt.Condition) } func (e *QueryExecutor) executeDropRetentionPolicyStatement(stmt *influxql.DropRetentionPolicyStatement) error { @@ -352,12 +326,7 @@ func (e *QueryExecutor) executeDropRetentionPolicyStatement(stmt *influxql.DropR } // Locally drop the retention policy. - if err := e.TSDBStore.DeleteRetentionPolicy(stmt.Database, stmt.Name); err != nil { - return err - } - - // Execute the statement on the other data nodes in the cluster. - return e.MetaExecutor.ExecuteStatement(stmt, stmt.Database) + return e.TSDBStore.DeleteRetentionPolicy(stmt.Database, stmt.Name) } func (e *QueryExecutor) executeDropSubscriptionStatement(q *influxql.DropSubscriptionStatement) error { @@ -520,51 +489,15 @@ func (e *QueryExecutor) iteratorCreator(stmt *influxql.SelectStatement, opt *inf return nil, err } - // Map shards to nodes. - shardIDsByNodeID := make(map[uint64][]uint64) - for _, si := range shards { - // Always assign to local node if it has the shard. - // Otherwise randomly select a remote node. - var nodeID uint64 - if si.OwnedBy(e.Node.ID) { - nodeID = e.Node.ID - } else if len(si.Owners) > 0 { - nodeID = si.Owners[rand.Intn(len(si.Owners))].NodeID - } else { - // This should not occur but if the shard has no owners then - // we don't want this to panic by trying to randomly select a node. - continue - } - - // Otherwise assign it to a remote shard randomly. - shardIDsByNodeID[nodeID] = append(shardIDsByNodeID[nodeID], si.ID) - } - // Generate iterators for each node. ics := make([]influxql.IteratorCreator, 0) if err := func() error { - for nodeID, shardIDs := range shardIDsByNodeID { - // Sort shard IDs so we get more predicable execution. - sort.Sort(uint64Slice(shardIDs)) - - // Create iterator creators from TSDB if local. - if nodeID == e.Node.ID { - for _, shardID := range shardIDs { - ic := e.TSDBStore.ShardIteratorCreator(shardID) - if ic == nil { - continue - } - ics = append(ics, ic) - } + for _, shard := range shards { + ic := e.TSDBStore.ShardIteratorCreator(shard.ID) + if ic == nil { continue } - - // Otherwise create iterator creator remotely. - dialer := &NodeDialer{ - MetaClient: e.MetaClient, - Timeout: e.Timeout, - } - ics = append(ics, newRemoteIteratorCreator(dialer, nodeID, shardIDs)) + ics = append(ics, ic) } return nil @@ -667,32 +600,6 @@ func (e *QueryExecutor) executeShowRetentionPoliciesStatement(q *influxql.ShowRe return []*models.Row{row}, nil } -func (e *QueryExecutor) executeShowServersStatement(q *influxql.ShowServersStatement) (models.Rows, error) { - nis, err := e.MetaClient.DataNodes() - if err != nil { - return nil, err - } - - dataNodes := &models.Row{Columns: []string{"id", "http_addr", "tcp_addr"}} - dataNodes.Name = "data_nodes" - for _, ni := range nis { - dataNodes.Values = append(dataNodes.Values, []interface{}{ni.ID, ni.Host, ni.TCPHost}) - } - - nis, err = e.MetaClient.MetaNodes() - if err != nil { - return nil, err - } - - metaNodes := &models.Row{Columns: []string{"id", "http_addr", "tcp_addr"}} - metaNodes.Name = "meta_nodes" - for _, ni := range nis { - metaNodes.Values = append(metaNodes.Values, []interface{}{ni.ID, ni.Host, ni.TCPHost}) - } - - return []*models.Row{dataNodes, metaNodes}, nil -} - func (e *QueryExecutor) executeShowShardsStatement(stmt *influxql.ShowShardsStatement) (models.Rows, error) { dis, err := e.MetaClient.Databases() if err != nil { @@ -961,155 +868,6 @@ type IntoWriteRequest struct { Points []models.Point } -// remoteIteratorCreator creates iterators for remote shards. -type remoteIteratorCreator struct { - dialer *NodeDialer - nodeID uint64 - shardIDs []uint64 -} - -// newRemoteIteratorCreator returns a new instance of remoteIteratorCreator for a remote shard. -func newRemoteIteratorCreator(dialer *NodeDialer, nodeID uint64, shardIDs []uint64) *remoteIteratorCreator { - return &remoteIteratorCreator{ - dialer: dialer, - nodeID: nodeID, - shardIDs: shardIDs, - } -} - -// CreateIterator creates a remote streaming iterator. -func (ic *remoteIteratorCreator) CreateIterator(opt influxql.IteratorOptions) (influxql.Iterator, error) { - conn, err := ic.dialer.DialNode(ic.nodeID) - if err != nil { - return nil, err - } - - if err := func() error { - // Write request. - if err := EncodeTLV(conn, createIteratorRequestMessage, &CreateIteratorRequest{ - ShardIDs: ic.shardIDs, - Opt: opt, - }); err != nil { - return err - } - - // Read the response. - var resp CreateIteratorResponse - if _, err := DecodeTLV(conn, &resp); err != nil { - return err - } else if resp.Err != nil { - return err - } - - return nil - }(); err != nil { - conn.Close() - return nil, err - } - - return influxql.NewReaderIterator(conn) -} - -// FieldDimensions returns the unique fields and dimensions across a list of sources. -func (ic *remoteIteratorCreator) FieldDimensions(sources influxql.Sources) (fields, dimensions map[string]struct{}, err error) { - conn, err := ic.dialer.DialNode(ic.nodeID) - if err != nil { - return nil, nil, err - } - defer conn.Close() - - // Write request. - if err := EncodeTLV(conn, fieldDimensionsRequestMessage, &FieldDimensionsRequest{ - ShardIDs: ic.shardIDs, - Sources: sources, - }); err != nil { - return nil, nil, err - } - - // Read the response. - var resp FieldDimensionsResponse - if _, err := DecodeTLV(conn, &resp); err != nil { - return nil, nil, err - } - return resp.Fields, resp.Dimensions, resp.Err -} - -// SeriesKeys returns a list of series keys from the underlying shard. -func (ic *remoteIteratorCreator) SeriesKeys(opt influxql.IteratorOptions) (influxql.SeriesList, error) { - conn, err := ic.dialer.DialNode(ic.nodeID) - if err != nil { - return nil, err - } - defer conn.Close() - - // Write request. - if err := EncodeTLV(conn, seriesKeysRequestMessage, &SeriesKeysRequest{ - ShardIDs: ic.shardIDs, - Opt: opt, - }); err != nil { - return nil, err - } - - // Read the response. - var resp SeriesKeysResponse - if _, err := DecodeTLV(conn, &resp); err != nil { - return nil, err - } - return resp.SeriesList, resp.Err -} - -// ExpandSources expands regex sources on a remote iterator creator. -func (ic *remoteIteratorCreator) ExpandSources(sources influxql.Sources) (influxql.Sources, error) { - conn, err := ic.dialer.DialNode(ic.nodeID) - if err != nil { - return nil, err - } - defer conn.Close() - - // Write request. - if err := EncodeTLV(conn, expandSourcesRequestMessage, &ExpandSourcesRequest{ - ShardIDs: ic.shardIDs, - Sources: sources, - }); err != nil { - return nil, err - } - - // Read the response. - var resp ExpandSourcesResponse - if _, err := DecodeTLV(conn, &resp); err != nil { - return nil, err - } - return resp.Sources, resp.Err -} - -// NodeDialer dials connections to a given node. -type NodeDialer struct { - MetaClient MetaClient - Timeout time.Duration -} - -// DialNode returns a connection to a node. -func (d *NodeDialer) DialNode(nodeID uint64) (net.Conn, error) { - ni, err := d.MetaClient.DataNode(nodeID) - if err != nil { - return nil, err - } - - conn, err := net.Dial("tcp", ni.TCPHost) - if err != nil { - return nil, err - } - conn.SetDeadline(time.Now().Add(d.Timeout)) - - // Write the cluster multiplexing header byte - if _, err := conn.Write([]byte{MuxHeader}); err != nil { - conn.Close() - return nil, err - } - - return conn, nil -} - // TSDBStore is an interface for accessing the time series data store. type TSDBStore interface { CreateShard(database, policy string, shardID uint64) error diff --git a/cluster/query_executor_test.go b/cluster/query_executor_test.go index 6a171cad109..fc0223f9eab 100644 --- a/cluster/query_executor_test.go +++ b/cluster/query_executor_test.go @@ -76,75 +76,6 @@ func TestQueryExecutor_ExecuteQuery_SelectStatement(t *testing.T) { } } -// Ensure query executor can execute a distributed SELECT statement. -func TestQueryExecutor_ExecuteQuery_SelectStatement_Remote(t *testing.T) { - // Local executor. - e := DefaultQueryExecutor() - - // Start a second service. - s := MustOpenService() - defer s.Close() - - // Mock the remote service to create an iterator. - s.TSDBStore.ShardIteratorCreatorFn = func(shardID uint64) influxql.IteratorCreator { - if shardID != 200 { - t.Fatalf("unexpected remote shard id: %d", shardID) - } - - var ic IteratorCreator - ic.CreateIteratorFn = func(opt influxql.IteratorOptions) (influxql.Iterator, error) { - return &FloatIterator{Points: []influxql.FloatPoint{ - {Name: "cpu", Time: int64(0 * time.Second), Value: 20}, - }}, nil - } - return &ic - } - - // Two shards are returned. One local and one remote. - e.MetaClient.ShardsByTimeRangeFn = func(sources influxql.Sources, tmin, tmax time.Time) (a []meta.ShardInfo, err error) { - return []meta.ShardInfo{ - {ID: 100, Owners: []meta.ShardOwner{{NodeID: 0}}}, - {ID: 200, Owners: []meta.ShardOwner{{NodeID: 1}}}, - }, nil - } - - // The meta client should return node data for the remote node. - e.MetaClient.DataNodeFn = func(id uint64) (*meta.NodeInfo, error) { - return &meta.NodeInfo{ID: 1, TCPHost: s.Addr().String()}, nil - } - - // The local node should return a single iterator. - e.TSDBStore.ShardIteratorCreatorFn = func(id uint64) influxql.IteratorCreator { - if id != 100 { - t.Fatalf("unexpected shard id: %d", id) - } - - var ic IteratorCreator - ic.CreateIteratorFn = func(opt influxql.IteratorOptions) (influxql.Iterator, error) { - return &FloatIterator{Points: []influxql.FloatPoint{ - {Name: "cpu", Time: int64(0 * time.Second), Value: 10}, - }}, nil - } - return &ic - } - - // Verify all results from the query. - if a := ReadAllResults(e.ExecuteQuery(`SELECT count(value) FROM cpu`, "db0", 0)); !reflect.DeepEqual(a, []*influxql.Result{ - { - StatementID: 0, - Series: []*models.Row{{ - Name: "cpu", - Columns: []string{"time", "count"}, - Values: [][]interface{}{ - {time.Unix(0, 0).UTC(), float64(30)}, - }, - }}, - }, - }) { - t.Fatalf("unexpected results: %s", spew.Sdump(a)) - } -} - // QueryExecutor is a test wrapper for cluster.QueryExecutor. type QueryExecutor struct { *cluster.QueryExecutor diff --git a/cluster/rpc.go b/cluster/rpc.go index 814615af3cc..774804358f4 100644 --- a/cluster/rpc.go +++ b/cluster/rpc.go @@ -15,10 +15,9 @@ import ( // WritePointsRequest represents a request to write point data to the cluster type WritePointsRequest struct { - Database string - RetentionPolicy string - ConsistencyLevel ConsistencyLevel - Points []models.Point + Database string + RetentionPolicy string + Points []models.Point } // AddPoint adds a point to the WritePointRequest with field key 'value' diff --git a/cluster/service.go b/cluster/service.go index cedc5a76f1a..ec78c78f4f3 100644 --- a/cluster/service.go +++ b/cluster/service.go @@ -14,7 +14,6 @@ import ( "github.com/influxdata/influxdb" "github.com/influxdata/influxdb/influxql" - "github.com/influxdata/influxdb/services/meta" "github.com/influxdata/influxdb/tsdb" ) @@ -38,9 +37,23 @@ const ( seriesKeysReq = "seriesKeysReq" seriesKeysResp = "seriesKeysResp" +) + +const ( + writeShardRequestMessage byte = iota + 1 + writeShardResponseMessage + + executeStatementRequestMessage + executeStatementResponseMessage + + createIteratorRequestMessage + createIteratorResponseMessage - expandSourcesReq = "expandSourcesReq" - expandSourcesResp = "expandSourcesResp" + fieldDimensionsRequestMessage + fieldDimensionsResponseMessage + + seriesKeysRequestMessage + seriesKeysResponseMessage ) // Service processes data received over raw TCP connections. @@ -52,10 +65,6 @@ type Service struct { Listener net.Listener - MetaClient interface { - ShardOwner(shardID uint64) (string, string, *meta.ShardGroupInfo) - } - TSDBStore TSDBStore Logger *log.Logger @@ -199,10 +208,6 @@ func (s *Service) handleConn(conn net.Conn) { s.statMap.Add(seriesKeysReq, 1) s.processSeriesKeysRequest(conn) return - case expandSourcesRequestMessage: - s.statMap.Add(expandSourcesReq, 1) - s.processExpandSourcesRequest(conn) - return default: s.Logger.Printf("cluster service message type not found: %d", typ) } @@ -446,48 +451,6 @@ func (s *Service) processSeriesKeysRequest(conn net.Conn) { } } -func (s *Service) processExpandSourcesRequest(conn net.Conn) { - var sources influxql.Sources - if err := func() error { - // Parse request. - var req ExpandSourcesRequest - if err := DecodeLV(conn, &req); err != nil { - return err - } - - // Collect iterator creators for each shard. - ics := make([]influxql.IteratorCreator, 0, len(req.ShardIDs)) - for _, shardID := range req.ShardIDs { - ic := s.TSDBStore.ShardIteratorCreator(shardID) - if ic == nil { - return nil - } - ics = append(ics, ic) - } - - // Expand sources from all shards. - a, err := influxql.IteratorCreators(ics).ExpandSources(req.Sources) - if err != nil { - return err - } - sources = a - - return nil - }(); err != nil { - s.Logger.Printf("error reading ExpandSources request: %s", err) - EncodeTLV(conn, expandSourcesResponseMessage, &ExpandSourcesResponse{Err: err}) - return - } - - // Encode success response. - if err := EncodeTLV(conn, expandSourcesResponseMessage, &ExpandSourcesResponse{ - Sources: sources, - }); err != nil { - s.Logger.Printf("error writing ExpandSources response: %s", err) - return - } -} - // ReadTLV reads a type-length-value record from r. func ReadTLV(r io.Reader) (byte, []byte, error) { typ, err := ReadType(r) diff --git a/cluster/shard_writer.go b/cluster/shard_writer.go deleted file mode 100644 index 4a83f8b97d1..00000000000 --- a/cluster/shard_writer.go +++ /dev/null @@ -1,191 +0,0 @@ -package cluster - -import ( - "fmt" - "net" - "time" - - "github.com/influxdata/influxdb/models" - "github.com/influxdata/influxdb/services/meta" -) - -const ( - writeShardRequestMessage byte = iota + 1 - writeShardResponseMessage - - executeStatementRequestMessage - executeStatementResponseMessage - - createIteratorRequestMessage - createIteratorResponseMessage - - fieldDimensionsRequestMessage - fieldDimensionsResponseMessage - - seriesKeysRequestMessage - seriesKeysResponseMessage - - expandSourcesRequestMessage - expandSourcesResponseMessage -) - -// ShardWriter writes a set of points to a shard. -type ShardWriter struct { - pool *clientPool - timeout time.Duration - maxConnections int - - MetaClient interface { - DataNode(id uint64) (ni *meta.NodeInfo, err error) - ShardOwner(shardID uint64) (database, policy string, sgi *meta.ShardGroupInfo) - } -} - -// NewShardWriter returns a new instance of ShardWriter. -func NewShardWriter(timeout time.Duration, maxConnections int) *ShardWriter { - return &ShardWriter{ - pool: newClientPool(), - timeout: timeout, - maxConnections: maxConnections, - } -} - -// WriteShard writes time series points to a shard -func (w *ShardWriter) WriteShard(shardID, ownerID uint64, points []models.Point) error { - c, err := w.dial(ownerID) - if err != nil { - return err - } - - conn, ok := c.(*pooledConn) - if !ok { - panic("wrong connection type") - } - defer func(conn net.Conn) { - conn.Close() // return to pool - }(conn) - - // Determine the location of this shard and whether it still exists - db, rp, sgi := w.MetaClient.ShardOwner(shardID) - if sgi == nil { - // If we can't get the shard group for this shard, then we need to drop this request - // as it is no longer valid. This could happen if writes were queued via - // hinted handoff and we're processing the queue after a shard group was deleted. - return nil - } - - // Build write request. - var request WriteShardRequest - request.SetShardID(shardID) - request.SetDatabase(db) - request.SetRetentionPolicy(rp) - request.AddPoints(points) - - // Marshal into protocol buffers. - buf, err := request.MarshalBinary() - if err != nil { - return err - } - - // Write request. - conn.SetWriteDeadline(time.Now().Add(w.timeout)) - if err := WriteTLV(conn, writeShardRequestMessage, buf); err != nil { - conn.MarkUnusable() - return err - } - - // Read the response. - conn.SetReadDeadline(time.Now().Add(w.timeout)) - _, buf, err = ReadTLV(conn) - if err != nil { - conn.MarkUnusable() - return err - } - - // Unmarshal response. - var response WriteShardResponse - if err := response.UnmarshalBinary(buf); err != nil { - return err - } - - if response.Code() != 0 { - return fmt.Errorf("error code %d: %s", response.Code(), response.Message()) - } - - return nil -} - -func (w *ShardWriter) dial(nodeID uint64) (net.Conn, error) { - // If we don't have a connection pool for that addr yet, create one - _, ok := w.pool.getPool(nodeID) - if !ok { - factory := &connFactory{nodeID: nodeID, clientPool: w.pool, timeout: w.timeout} - factory.metaClient = w.MetaClient - - p, err := NewBoundedPool(1, w.maxConnections, w.timeout, factory.dial) - if err != nil { - return nil, err - } - w.pool.setPool(nodeID, p) - } - return w.pool.conn(nodeID) -} - -// Close closes ShardWriter's pool -func (w *ShardWriter) Close() error { - if w.pool == nil { - return fmt.Errorf("client already closed") - } - w.pool.close() - w.pool = nil - return nil -} - -const ( - maxConnections = 500 - maxRetries = 3 -) - -var errMaxConnectionsExceeded = fmt.Errorf("can not exceed max connections of %d", maxConnections) - -type connFactory struct { - nodeID uint64 - timeout time.Duration - - clientPool interface { - size() int - } - - metaClient interface { - DataNode(id uint64) (ni *meta.NodeInfo, err error) - } -} - -func (c *connFactory) dial() (net.Conn, error) { - if c.clientPool.size() > maxConnections { - return nil, errMaxConnectionsExceeded - } - - ni, err := c.metaClient.DataNode(c.nodeID) - if err != nil { - return nil, err - } - - if ni == nil { - return nil, fmt.Errorf("node %d does not exist", c.nodeID) - } - - conn, err := net.DialTimeout("tcp", ni.TCPHost, c.timeout) - if err != nil { - return nil, err - } - - // Write a marker byte for cluster messages. - _, err = conn.Write([]byte{MuxHeader}) - if err != nil { - conn.Close() - return nil, err - } - - return conn, nil -} diff --git a/cluster/shard_writer_test.go b/cluster/shard_writer_test.go deleted file mode 100644 index ffbcbebaa1f..00000000000 --- a/cluster/shard_writer_test.go +++ /dev/null @@ -1,224 +0,0 @@ -package cluster_test - -import ( - "net" - "strings" - "testing" - "time" - - "github.com/influxdata/influxdb/cluster" - "github.com/influxdata/influxdb/models" - "github.com/influxdata/influxdb/toml" -) - -// Ensure the shard writer can successfully write a single request. -func TestShardWriter_WriteShard_Success(t *testing.T) { - ts := newTestWriteService(nil) - ts.TSDBStore.WriteToShardFn = ts.writeShardSuccess - s := cluster.NewService(cluster.Config{}) - s.Listener = ts.muxln - s.TSDBStore = &ts.TSDBStore - if err := s.Open(); err != nil { - t.Fatal(err) - } - defer s.Close() - defer ts.Close() - - w := cluster.NewShardWriter(time.Minute, 1) - w.MetaClient = &metaClient{host: ts.ln.Addr().String()} - - // Build a single point. - now := time.Now() - var points []models.Point - points = append(points, models.MustNewPoint("cpu", models.Tags{"host": "server01"}, map[string]interface{}{"value": int64(100)}, now)) - - // Write to shard and close. - if err := w.WriteShard(1, 2, points); err != nil { - t.Fatal(err) - } else if err := w.Close(); err != nil { - t.Fatal(err) - } - - // Validate response. - responses, err := ts.ResponseN(1) - if err != nil { - t.Fatal(err) - } else if responses[0].shardID != 1 { - t.Fatalf("unexpected shard id: %d", responses[0].shardID) - } - - // Validate point. - if p := responses[0].points[0]; p.Name() != "cpu" { - t.Fatalf("unexpected name: %s", p.Name()) - } else if p.Fields()["value"] != int64(100) { - t.Fatalf("unexpected 'value' field: %d", p.Fields()["value"]) - } else if p.Tags()["host"] != "server01" { - t.Fatalf("unexpected 'host' tag: %s", p.Tags()["host"]) - } else if p.Time().UnixNano() != now.UnixNano() { - t.Fatalf("unexpected time: %s", p.Time()) - } -} - -// Ensure the shard writer can successful write a multiple requests. -func TestShardWriter_WriteShard_Multiple(t *testing.T) { - ts := newTestWriteService(nil) - ts.TSDBStore.WriteToShardFn = ts.writeShardSuccess - s := cluster.NewService(cluster.Config{}) - s.Listener = ts.muxln - s.TSDBStore = &ts.TSDBStore - if err := s.Open(); err != nil { - t.Fatal(err) - } - defer s.Close() - defer ts.Close() - - w := cluster.NewShardWriter(time.Minute, 1) - w.MetaClient = &metaClient{host: ts.ln.Addr().String()} - - // Build a single point. - now := time.Now() - var points []models.Point - points = append(points, models.MustNewPoint("cpu", models.Tags{"host": "server01"}, map[string]interface{}{"value": int64(100)}, now)) - - // Write to shard twice and close. - if err := w.WriteShard(1, 2, points); err != nil { - t.Fatal(err) - } else if err := w.WriteShard(1, 2, points); err != nil { - t.Fatal(err) - } else if err := w.Close(); err != nil { - t.Fatal(err) - } - - // Validate response. - responses, err := ts.ResponseN(1) - if err != nil { - t.Fatal(err) - } else if responses[0].shardID != 1 { - t.Fatalf("unexpected shard id: %d", responses[0].shardID) - } - - // Validate point. - if p := responses[0].points[0]; p.Name() != "cpu" { - t.Fatalf("unexpected name: %s", p.Name()) - } else if p.Fields()["value"] != int64(100) { - t.Fatalf("unexpected 'value' field: %d", p.Fields()["value"]) - } else if p.Tags()["host"] != "server01" { - t.Fatalf("unexpected 'host' tag: %s", p.Tags()["host"]) - } else if p.Time().UnixNano() != now.UnixNano() { - t.Fatalf("unexpected time: %s", p.Time()) - } -} - -// Ensure the shard writer returns an error when the server fails to accept the write. -func TestShardWriter_WriteShard_Error(t *testing.T) { - ts := newTestWriteService(writeShardFail) - s := cluster.NewService(cluster.Config{}) - s.Listener = ts.muxln - s.TSDBStore = &ts.TSDBStore - if err := s.Open(); err != nil { - t.Fatal(err) - } - defer s.Close() - defer ts.Close() - - w := cluster.NewShardWriter(time.Minute, 1) - w.MetaClient = &metaClient{host: ts.ln.Addr().String()} - now := time.Now() - - shardID := uint64(1) - ownerID := uint64(2) - var points []models.Point - points = append(points, models.MustNewPoint( - "cpu", models.Tags{"host": "server01"}, map[string]interface{}{"value": int64(100)}, now, - )) - - if err := w.WriteShard(shardID, ownerID, points); err == nil || err.Error() != "error code 1: write shard 1: failed to write" { - t.Fatalf("unexpected error: %v", err) - } -} - -// Ensure the shard writer returns an error when dialing times out. -func TestShardWriter_Write_ErrDialTimeout(t *testing.T) { - ts := newTestWriteService(nil) - ts.TSDBStore.WriteToShardFn = ts.writeShardSuccess - s := cluster.NewService(cluster.Config{}) - s.Listener = ts.muxln - s.TSDBStore = &ts.TSDBStore - if err := s.Open(); err != nil { - t.Fatal(err) - } - defer s.Close() - defer ts.Close() - - // Zero timeout set to support all platforms. - w := cluster.NewShardWriter(0, 1) - w.MetaClient = &metaClient{host: ts.ln.Addr().String()} - now := time.Now() - - shardID := uint64(1) - ownerID := uint64(2) - var points []models.Point - - points = append(points, models.MustNewPoint( - "cpu", models.Tags{"host": "server01"}, map[string]interface{}{"value": int64(100)}, now, - )) - - if err, exp := w.WriteShard(shardID, ownerID, points), "i/o timeout"; err == nil || !strings.Contains(err.Error(), exp) { - t.Fatalf("expected error %v, to contain %s", err, exp) - } -} - -// Ensure the shard writer returns an error when reading times out. -func TestShardWriter_Write_ErrReadTimeout(t *testing.T) { - ln, err := net.Listen("tcp", "127.0.0.1:0") - if err != nil { - t.Fatal(err) - } - - w := cluster.NewShardWriter(time.Millisecond, 1) - w.MetaClient = &metaClient{host: ln.Addr().String()} - now := time.Now() - - shardID := uint64(1) - ownerID := uint64(2) - var points []models.Point - points = append(points, models.MustNewPoint( - "cpu", models.Tags{"host": "server01"}, map[string]interface{}{"value": int64(100)}, now, - )) - - if err := w.WriteShard(shardID, ownerID, points); err == nil || !strings.Contains(err.Error(), "i/o timeout") { - t.Fatalf("unexpected error: %s", err) - } -} - -// Ensure the shard writer returns an error when we can't get a connection. -func TestShardWriter_Write_PoolMax(t *testing.T) { - ts := newTestWriteService(writeShardSlow) - s := cluster.NewService(cluster.Config{ - ShardWriterTimeout: toml.Duration(100 * time.Millisecond), - }) - s.Listener = ts.muxln - s.TSDBStore = &ts.TSDBStore - if err := s.Open(); err != nil { - t.Fatal(err) - } - defer s.Close() - defer ts.Close() - - w := cluster.NewShardWriter(100*time.Millisecond, 1) - w.MetaClient = &metaClient{host: ts.ln.Addr().String()} - now := time.Now() - - shardID := uint64(1) - ownerID := uint64(2) - var points []models.Point - points = append(points, models.MustNewPoint( - "cpu", models.Tags{"host": "server01"}, map[string]interface{}{"value": int64(100)}, now, - )) - - go w.WriteShard(shardID, ownerID, points) - time.Sleep(time.Millisecond) - if err := w.WriteShard(shardID, ownerID, points); err == nil || err.Error() != "timed out waiting for free connection" { - t.Fatalf("unexpected error: %v", err) - } -} diff --git a/cmd/influx/cli/cli.go b/cmd/influx/cli/cli.go index f6929bcf640..53499a71273 100644 --- a/cmd/influx/cli/cli.go +++ b/cmd/influx/cli/cli.go @@ -20,8 +20,8 @@ import ( "text/tabwriter" "github.com/influxdata/influxdb/client" - "github.com/influxdata/influxdb/cluster" "github.com/influxdata/influxdb/importer/v8" + "github.com/influxdata/influxdb/models" "github.com/peterh/liner" ) @@ -409,7 +409,7 @@ func (c *CommandLine) SetWriteConsistency(cmd string) { // normalize cmd cmd = strings.ToLower(cmd) - _, err := cluster.ParseConsistencyLevel(cmd) + _, err := models.ParseConsistencyLevel(cmd) if err != nil { fmt.Printf("Unknown consistency level %q. Please use any, one, quorum, or all.\n", cmd) return diff --git a/cmd/influxd/restore/restore.go b/cmd/influxd/restore/restore.go index 97e37cf99be..c50c61cdbb9 100644 --- a/cmd/influxd/restore/restore.go +++ b/cmd/influxd/restore/restore.go @@ -52,11 +52,6 @@ func (cmd *Command) Run(args ...string) error { return err } - if err := cmd.ensureStopped(); err != nil { - fmt.Fprintln(cmd.Stderr, "influxd cannot be running during a restore. Please stop any running instances and try again.") - return err - } - if cmd.metadir != "" { if err := cmd.unpackMeta(); err != nil { return err @@ -119,15 +114,6 @@ func (cmd *Command) parseFlags(args []string) error { return nil } -func (cmd *Command) ensureStopped() error { - ln, err := net.Listen("tcp", cmd.MetaConfig.BindAddress) - if err != nil { - return fmt.Errorf("influxd running on %s: aborting", cmd.MetaConfig.BindAddress) - } - defer ln.Close() - return nil -} - // unpackMeta reads the metadata from the backup directory and initializes a raft // cluster and replaces the root metadata. func (cmd *Command) unpackMeta() error { @@ -184,7 +170,6 @@ func (cmd *Command) unpackMeta() error { // Copy meta config and remove peers so it starts in single mode. c := cmd.MetaConfig - c.JoinPeers = nil c.LoggingEnabled = false // Create the meta dir @@ -197,26 +182,7 @@ func (cmd *Command) unpackMeta() error { return err } - // Initialize meta store. - store := meta.NewService(c) - store.RaftListener = newNopListener() - - // Open the meta store. - if err := store.Open(); err != nil { - return fmt.Errorf("open store: %s", err) - } - defer store.Close() - - // Wait for the store to be ready or error. - select { - case err := <-store.Err(): - return err - default: - } - - client := meta.NewClient() - client.SetMetaServers([]string{store.HTTPAddr()}) - client.SetTLS(false) + client := meta.NewClient(c) client.SetLogger(log.New(ioutil.Discard, "", 0)) if err := client.Open(); err != nil { return err diff --git a/cmd/influxd/run/backup_restore_test.go b/cmd/influxd/run/backup_restore_test.go index 8ebc56d7eb3..58f34bca864 100644 --- a/cmd/influxd/run/backup_restore_test.go +++ b/cmd/influxd/run/backup_restore_test.go @@ -11,7 +11,6 @@ import ( "github.com/influxdata/influxdb/cmd/influxd/backup" "github.com/influxdata/influxdb/cmd/influxd/restore" "github.com/influxdata/influxdb/cmd/influxd/run" - "github.com/influxdata/influxdb/services/meta" ) func TestServer_BackupAndRestore(t *testing.T) { @@ -19,8 +18,7 @@ func TestServer_BackupAndRestore(t *testing.T) { config.Data.Engine = "tsm1" config.Data.Dir, _ = ioutil.TempDir("", "data_backup") config.Meta.Dir, _ = ioutil.TempDir("", "meta_backup") - config.Meta.BindAddress = freePort() - config.Meta.HTTPBindAddress = freePort() + config.BindAddress = freePort() backupDir, _ := ioutil.TempDir("", "backup") defer os.RemoveAll(backupDir) @@ -33,7 +31,7 @@ func TestServer_BackupAndRestore(t *testing.T) { config.Data.CacheSnapshotMemorySize = 1 func() { - s := OpenServer(config, "") + s := OpenServer(config) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy(db, newRetentionPolicyInfo(rp, 1, 0)); err != nil { @@ -43,7 +41,9 @@ func TestServer_BackupAndRestore(t *testing.T) { t.Fatal(err) } - s.MustWrite(db, rp, "myseries,host=A value=23 1000000", nil) + if _, err := s.Write(db, rp, "myseries,host=A value=23 1000000", nil); err != nil { + t.Fatalf("failed to write: %s", err) + } // wait for the snapshot to write time.Sleep(time.Second) @@ -58,9 +58,9 @@ func TestServer_BackupAndRestore(t *testing.T) { // now backup cmd := backup.NewCommand() - hostAddress, _ := meta.DefaultHost(run.DefaultHostname, config.Meta.BindAddress) + hostAddress, _ := run.DefaultHost(run.DefaultHostname, config.BindAddress) if err := cmd.Run("-host", hostAddress, "-database", "mydb", backupDir); err != nil { - t.Fatalf("error backing up: %s", err.Error()) + t.Fatalf("error backing up: %s, hostAddress: %s", err.Error(), hostAddress) } }() @@ -74,7 +74,6 @@ func TestServer_BackupAndRestore(t *testing.T) { // restore cmd := restore.NewCommand() - cmd.MetaConfig.BindAddress = config.Meta.BindAddress if err := cmd.Run("-metadir", config.Meta.Dir, "-datadir", config.Data.Dir, "-database", "mydb", backupDir); err != nil { t.Fatalf("error restoring: %s", err.Error()) @@ -87,7 +86,7 @@ func TestServer_BackupAndRestore(t *testing.T) { } // now open it up and verify we're good - s := OpenServer(config, "") + s := OpenServer(config) defer s.Close() res, err := s.Query(`select * from "mydb"."forever"."myseries"`) diff --git a/cmd/influxd/run/command.go b/cmd/influxd/run/command.go index c7e4efd24ed..76d499d1601 100644 --- a/cmd/influxd/run/command.go +++ b/cmd/influxd/run/command.go @@ -10,7 +10,6 @@ import ( "path/filepath" "runtime" "strconv" - "strings" "time" "github.com/BurntSushi/toml" @@ -98,24 +97,10 @@ func (cmd *Command) Run(args ...string) error { return fmt.Errorf("apply env config: %v", err) } - // Propogate the top-level join options down to the meta config - if config.Join != "" { - config.Meta.JoinPeers = strings.Split(config.Join, ",") - } - - // Command-line flags for -join and -hostname override the config - // and env variable - if options.Join != "" { - config.Meta.JoinPeers = strings.Split(options.Join, ",") - } - if options.Hostname != "" { config.Hostname = options.Hostname } - // Propogate the top-level hostname down to dependendent configs - config.Meta.RemoteHostname = config.Hostname - // Validate the configuration. if err := config.Validate(); err != nil { return fmt.Errorf("%s. To generate a valid configuration file run `influxd config > influxdb.generated.conf`", err) @@ -173,7 +158,6 @@ func (cmd *Command) ParseFlags(args ...string) (Options, error) { fs := flag.NewFlagSet("", flag.ContinueOnError) fs.StringVar(&options.ConfigPath, "config", "", "") fs.StringVar(&options.PIDFile, "pidfile", "", "") - fs.StringVar(&options.Join, "join", "", "") fs.StringVar(&options.Hostname, "hostname", "", "") fs.StringVar(&options.CPUProfile, "cpuprofile", "", "") fs.StringVar(&options.MemProfile, "memprofile", "", "") @@ -233,10 +217,6 @@ then a new cluster will be initialized unless the -join argument is used. -config Set the path to the configuration file. - -join - Joins the server to an existing cluster. Should be - the HTTP bind address of an existing meta server - -hostname Override the hostname, the 'hostname' configuration option will be overridden. @@ -255,7 +235,6 @@ then a new cluster will be initialized unless the -join argument is used. type Options struct { ConfigPath string PIDFile string - Join string Hostname string CPUProfile string MemProfile string diff --git a/cmd/influxd/run/config.go b/cmd/influxd/run/config.go index bbfa18c742d..5d7e9123809 100644 --- a/cmd/influxd/run/config.go +++ b/cmd/influxd/run/config.go @@ -1,7 +1,6 @@ package run import ( - "errors" "fmt" "os" "os/user" @@ -17,7 +16,6 @@ import ( "github.com/influxdata/influxdb/services/collectd" "github.com/influxdata/influxdb/services/continuous_querier" "github.com/influxdata/influxdb/services/graphite" - "github.com/influxdata/influxdb/services/hh" "github.com/influxdata/influxdb/services/httpd" "github.com/influxdata/influxdb/services/meta" "github.com/influxdata/influxdb/services/opentsdb" @@ -55,7 +53,6 @@ type Config struct { UDPs []udp.Config `toml:"udp"` ContinuousQuery continuous_querier.Config `toml:"continuous_queries"` - HintedHandoff hh.Config `toml:"hinted-handoff"` // Server reporting ReportingDisabled bool `toml:"reporting-disabled"` @@ -87,7 +84,6 @@ func NewConfig() *Config { c.ContinuousQuery = continuous_querier.NewConfig() c.Retention = retention.NewConfig() - c.HintedHandoff = hh.NewConfig() c.BindAddress = DefaultBindAddress // All ARRAY attributes have to be init after toml decode @@ -128,10 +124,8 @@ func NewDemoConfig() (*Config, error) { c.Meta.Dir = filepath.Join(homeDir, ".influxdb/meta") c.Data.Dir = filepath.Join(homeDir, ".influxdb/data") - c.HintedHandoff.Dir = filepath.Join(homeDir, ".influxdb/hh") c.Data.WALDir = filepath.Join(homeDir, ".influxdb/wal") - c.HintedHandoff.Enabled = true c.Admin.Enabled = true return c, nil @@ -139,34 +133,23 @@ func NewDemoConfig() (*Config, error) { // Validate returns an error if the config is invalid. func (c *Config) Validate() error { - if !c.Meta.Enabled && !c.Data.Enabled { - return errors.New("either Meta, Data, or both must be enabled") + if err := c.Meta.Validate(); err != nil { + return err } - if c.Meta.Enabled { - if err := c.Meta.Validate(); err != nil { - return err - } - - // If the config is for a meta-only node, we can't store monitor stats - // locally. - if c.Monitor.StoreEnabled && !c.Data.Enabled { - return fmt.Errorf("monitor storage can not be enabled on meta only nodes") - } + // If the config is for a meta-only node, we can't store monitor stats + // locally. + if c.Monitor.StoreEnabled { + return fmt.Errorf("monitor storage can not be enabled on meta only nodes") } - if c.Data.Enabled { - if err := c.Data.Validate(); err != nil { - return err - } + if err := c.Data.Validate(); err != nil { + return err + } - if err := c.HintedHandoff.Validate(); err != nil { - return err - } - for _, g := range c.Graphites { - if err := g.Validate(); err != nil { - return fmt.Errorf("invalid graphite config: %v", err) - } + for _, g := range c.Graphites { + if err := g.Validate(); err != nil { + return fmt.Errorf("invalid graphite config: %v", err) } } diff --git a/cmd/influxd/run/server.go b/cmd/influxd/run/server.go index 1d69c0c3258..56d1bd411f5 100644 --- a/cmd/influxd/run/server.go +++ b/cmd/influxd/run/server.go @@ -2,12 +2,14 @@ package run import ( "fmt" + "io/ioutil" "log" "net" "os" "path/filepath" "runtime" "runtime/pprof" + "strings" "time" "github.com/influxdata/influxdb" @@ -19,7 +21,6 @@ import ( "github.com/influxdata/influxdb/services/continuous_querier" "github.com/influxdata/influxdb/services/copier" "github.com/influxdata/influxdb/services/graphite" - "github.com/influxdata/influxdb/services/hh" "github.com/influxdata/influxdb/services/httpd" "github.com/influxdata/influxdb/services/meta" "github.com/influxdata/influxdb/services/opentsdb" @@ -61,16 +62,11 @@ type Server struct { BindAddress string Listener net.Listener - Node *influxdb.Node - - MetaClient *meta.Client - MetaService *meta.Service + MetaClient *meta.Client TSDBStore *tsdb.Store QueryExecutor *cluster.QueryExecutor PointsWriter *cluster.PointsWriter - ShardWriter *cluster.ShardWriter - HintedHandoff *hh.Service Subscriber *subscriber.Service Services []Service @@ -89,12 +85,6 @@ type Server struct { CPUProfile string MemProfile string - // joinPeers are the metaservers specified at run time to join this server to - joinPeers []string - - // metaUseTLS specifies if we should use a TLS connection to the meta servers - metaUseTLS bool - // httpAPIAddr is the host:port combination for the main HTTP API for querying and writing data httpAPIAddr string @@ -137,16 +127,17 @@ func NewServer(c *Config, buildInfo *BuildInfo) (*Server, error) { node = influxdb.NewNode(c.Meta.Dir) } + // In 0.11 we removed MetaServers from node.json. To avoid confusion for + // existing users, force a re-save of the node.json file to remove that property + // if it happens to exist. + nodeContents, err := ioutil.ReadFile(filepath.Join(c.Meta.Dir, "node.json")) + if err == nil && strings.Contains(string(nodeContents), "MetaServers") { + node.Save() + } + // In 0.10.0 bind-address got moved to the top level. Check // The old location to keep things backwards compatible bind := c.BindAddress - if c.Meta.BindAddress != "" { - bind = c.Meta.BindAddress - } - - if !c.Data.Enabled && !c.Meta.Enabled { - return nil, fmt.Errorf("must run as either meta node or data node or both") - } s := &Server{ buildInfo: *buildInfo, @@ -155,14 +146,11 @@ func NewServer(c *Config, buildInfo *BuildInfo) (*Server, error) { BindAddress: bind, - Node: node, - MetaClient: meta.NewClient(), + MetaClient: meta.NewClient(c.Meta), Monitor: monitor.New(c.Monitor), reportingDisabled: c.ReportingDisabled, - joinPeers: c.Meta.JoinPeers, - metaUseTLS: c.Meta.HTTPSEnabled, httpAPIAddr: c.HTTPD.BindAddress, httpUseTLS: c.HTTPD.HTTPSEnabled, @@ -171,73 +159,51 @@ func NewServer(c *Config, buildInfo *BuildInfo) (*Server, error) { config: c, } - if c.Meta.Enabled { - s.MetaService = meta.NewService(c.Meta) - s.MetaService.Version = s.buildInfo.Version - s.MetaService.Node = s.Node - } - - if c.Data.Enabled { - s.TSDBStore = tsdb.NewStore(c.Data.Dir) - s.TSDBStore.EngineOptions.Config = c.Data - - // Copy TSDB configuration. - s.TSDBStore.EngineOptions.EngineVersion = c.Data.Engine - s.TSDBStore.EngineOptions.MaxWALSize = c.Data.MaxWALSize - s.TSDBStore.EngineOptions.WALFlushInterval = time.Duration(c.Data.WALFlushInterval) - s.TSDBStore.EngineOptions.WALPartitionFlushDelay = time.Duration(c.Data.WALPartitionFlushDelay) - - // Set the shard writer - s.ShardWriter = cluster.NewShardWriter(time.Duration(c.Cluster.ShardWriterTimeout), - c.Cluster.MaxRemoteWriteConnections) - - // Create the hinted handoff service - s.HintedHandoff = hh.NewService(c.HintedHandoff, s.ShardWriter, s.MetaClient) - s.HintedHandoff.Monitor = s.Monitor - - // Create the Subscriber service - s.Subscriber = subscriber.NewService(c.Subscriber) - - // Initialize points writer. - s.PointsWriter = cluster.NewPointsWriter() - s.PointsWriter.WriteTimeout = time.Duration(c.Cluster.WriteTimeout) - s.PointsWriter.TSDBStore = s.TSDBStore - s.PointsWriter.ShardWriter = s.ShardWriter - s.PointsWriter.HintedHandoff = s.HintedHandoff - s.PointsWriter.Subscriber = s.Subscriber - s.PointsWriter.Node = s.Node - - // Initialize meta executor. - metaExecutor := cluster.NewMetaExecutor() - metaExecutor.MetaClient = s.MetaClient - metaExecutor.Node = s.Node - - // Initialize query executor. - s.QueryExecutor = cluster.NewQueryExecutor() - s.QueryExecutor.MetaClient = s.MetaClient - s.QueryExecutor.TSDBStore = s.TSDBStore - s.QueryExecutor.Monitor = s.Monitor - s.QueryExecutor.PointsWriter = s.PointsWriter - s.QueryExecutor.MetaExecutor = metaExecutor - if c.Data.QueryLogEnabled { - s.QueryExecutor.LogOutput = os.Stderr - } + if err := s.MetaClient.Open(); err != nil { + return nil, err + } - // Initialize the monitor - s.Monitor.Version = s.buildInfo.Version - s.Monitor.Commit = s.buildInfo.Commit - s.Monitor.Branch = s.buildInfo.Branch - s.Monitor.BuildTime = s.buildInfo.Time - s.Monitor.PointsWriter = (*monitorPointsWriter)(s.PointsWriter) + s.TSDBStore = tsdb.NewStore(c.Data.Dir) + s.TSDBStore.EngineOptions.Config = c.Data + + // Copy TSDB configuration. + s.TSDBStore.EngineOptions.EngineVersion = c.Data.Engine + s.TSDBStore.EngineOptions.MaxWALSize = c.Data.MaxWALSize + s.TSDBStore.EngineOptions.WALFlushInterval = time.Duration(c.Data.WALFlushInterval) + s.TSDBStore.EngineOptions.WALPartitionFlushDelay = time.Duration(c.Data.WALPartitionFlushDelay) + + // Create the Subscriber service + s.Subscriber = subscriber.NewService(c.Subscriber) + + // Initialize points writer. + s.PointsWriter = cluster.NewPointsWriter() + s.PointsWriter.WriteTimeout = time.Duration(c.Cluster.WriteTimeout) + s.PointsWriter.TSDBStore = s.TSDBStore + s.PointsWriter.Subscriber = s.Subscriber + + // Initialize query executor. + s.QueryExecutor = cluster.NewQueryExecutor() + s.QueryExecutor.MetaClient = s.MetaClient + s.QueryExecutor.TSDBStore = s.TSDBStore + s.QueryExecutor.Monitor = s.Monitor + s.QueryExecutor.PointsWriter = s.PointsWriter + if c.Data.QueryLogEnabled { + s.QueryExecutor.LogOutput = os.Stderr } + // Initialize the monitor + s.Monitor.Version = s.buildInfo.Version + s.Monitor.Commit = s.buildInfo.Commit + s.Monitor.Branch = s.buildInfo.Branch + s.Monitor.BuildTime = s.buildInfo.Time + s.Monitor.PointsWriter = (*monitorPointsWriter)(s.PointsWriter) + return s, nil } func (s *Server) appendClusterService(c cluster.Config) { srv := cluster.NewService(c) srv.TSDBStore = s.TSDBStore - srv.MetaClient = s.MetaClient s.Services = append(s.Services, srv) s.ClusterService = srv } @@ -246,7 +212,6 @@ func (s *Server) appendSnapshotterService() { srv := snapshotter.NewService() srv.TSDBStore = s.TSDBStore srv.MetaClient = s.MetaClient - srv.Node = s.Node s.Services = append(s.Services, srv) s.SnapshotterService = srv } @@ -391,90 +356,60 @@ func (s *Server) Open() error { mux := tcp.NewMux() go mux.Serve(ln) - if s.MetaService != nil { - s.MetaService.RaftListener = mux.Listen(meta.MuxHeader) - // Open meta service. - if err := s.MetaService.Open(); err != nil { - return fmt.Errorf("open meta service: %s", err) - } - go s.monitorErrorChan(s.MetaService.Err()) - } - - // initialize MetaClient. - if err = s.initializeMetaClient(); err != nil { + // Append services. + s.appendClusterService(s.config.Cluster) + s.appendPrecreatorService(s.config.Precreator) + s.appendSnapshotterService() + s.appendCopierService() + s.appendAdminService(s.config.Admin) + s.appendContinuousQueryService(s.config.ContinuousQuery) + s.appendHTTPDService(s.config.HTTPD) + s.appendCollectdService(s.config.Collectd) + if err := s.appendOpenTSDBService(s.config.OpenTSDB); err != nil { return err } - - if s.TSDBStore != nil { - // Append services. - s.appendClusterService(s.config.Cluster) - s.appendPrecreatorService(s.config.Precreator) - s.appendSnapshotterService() - s.appendCopierService() - s.appendAdminService(s.config.Admin) - s.appendContinuousQueryService(s.config.ContinuousQuery) - s.appendHTTPDService(s.config.HTTPD) - s.appendCollectdService(s.config.Collectd) - if err := s.appendOpenTSDBService(s.config.OpenTSDB); err != nil { + for _, g := range s.config.UDPs { + s.appendUDPService(g) + } + s.appendRetentionPolicyService(s.config.Retention) + for _, g := range s.config.Graphites { + if err := s.appendGraphiteService(g); err != nil { return err } - for _, g := range s.config.UDPs { - s.appendUDPService(g) - } - s.appendRetentionPolicyService(s.config.Retention) - for _, g := range s.config.Graphites { - if err := s.appendGraphiteService(g); err != nil { - return err - } - } + } - s.QueryExecutor.Node = s.Node - - s.Subscriber.MetaClient = s.MetaClient - s.ShardWriter.MetaClient = s.MetaClient - s.HintedHandoff.MetaClient = s.MetaClient - s.Subscriber.MetaClient = s.MetaClient - s.PointsWriter.MetaClient = s.MetaClient - s.Monitor.MetaClient = s.MetaClient - - s.ClusterService.Listener = mux.Listen(cluster.MuxHeader) - s.SnapshotterService.Listener = mux.Listen(snapshotter.MuxHeader) - s.CopierService.Listener = mux.Listen(copier.MuxHeader) - - // Open TSDB store. - if err := s.TSDBStore.Open(); err != nil { - // Provide helpful error if user needs to upgrade shards to - // tsm1. - if serr, ok := err.(tsdb.ShardError); ok && serr.Err == tsdb.ErrUnknownEngineFormat { - return influxdb.ErrUpgradeEngine - } - return fmt.Errorf("open tsdb store: %s", err) - } + s.Subscriber.MetaClient = s.MetaClient + s.Subscriber.MetaClient = s.MetaClient + s.PointsWriter.MetaClient = s.MetaClient + s.Monitor.MetaClient = s.MetaClient - // Open the hinted handoff service - if err := s.HintedHandoff.Open(); err != nil { - return fmt.Errorf("open hinted handoff: %s", err) - } + s.ClusterService.Listener = mux.Listen(cluster.MuxHeader) + s.SnapshotterService.Listener = mux.Listen(snapshotter.MuxHeader) + s.CopierService.Listener = mux.Listen(copier.MuxHeader) - // Open the subcriber service - if err := s.Subscriber.Open(); err != nil { - return fmt.Errorf("open subscriber: %s", err) - } + // Open TSDB store. + if err := s.TSDBStore.Open(); err != nil { + return fmt.Errorf("open tsdb store: %s", err) + } - // Open the points writer service - if err := s.PointsWriter.Open(); err != nil { - return fmt.Errorf("open points writer: %s", err) - } + // Open the subcriber service + if err := s.Subscriber.Open(); err != nil { + return fmt.Errorf("open subscriber: %s", err) + } - // Open the monitor service - if err := s.Monitor.Open(); err != nil { - return fmt.Errorf("open monitor: %v", err) - } + // Open the points writer service + if err := s.PointsWriter.Open(); err != nil { + return fmt.Errorf("open points writer: %s", err) + } - for _, service := range s.Services { - if err := service.Open(); err != nil { - return fmt.Errorf("open service: %s", err) - } + // Open the monitor service + if err := s.Monitor.Open(); err != nil { + return fmt.Errorf("open monitor: %v", err) + } + + for _, service := range s.Services { + if err := service.Open(); err != nil { + return fmt.Errorf("open service: %s", err) } } @@ -509,10 +444,6 @@ func (s *Server) Close() error { s.PointsWriter.Close() } - if s.HintedHandoff != nil { - s.HintedHandoff.Close() - } - // Close the TSDBStore, no more reads or writes at this point if s.TSDBStore != nil { s.TSDBStore.Close() @@ -522,11 +453,6 @@ func (s *Server) Close() error { s.Subscriber.Close() } - // Finally close the meta-store since everything else depends on it - if s.MetaService != nil { - s.MetaService.Close() - } - if s.MetaClient != nil { s.MetaClient.Close() } @@ -589,7 +515,6 @@ func (s *Server) reportServer() { "os": runtime.GOOS, "arch": runtime.GOARCH, "version": s.buildInfo.Version, - "server_id": fmt.Sprintf("%v", s.Node.ID), "cluster_id": fmt.Sprintf("%v", clusterID), "num_series": numSeries, "num_measurements": numMeasurements, @@ -620,53 +545,6 @@ func (s *Server) monitorErrorChan(ch <-chan error) { } } -// initializeMetaClient will set the MetaClient and join the node to the cluster if needed -func (s *Server) initializeMetaClient() error { - // It's the first time starting up and we need to either join - // the cluster or initialize this node as the first member - if len(s.joinPeers) == 0 { - // start up a new single node cluster - if s.MetaService == nil { - return fmt.Errorf("server not set to join existing cluster must run also as a meta node") - } - s.MetaClient.SetMetaServers([]string{s.MetaService.HTTPAddr()}) - s.MetaClient.SetTLS(s.metaUseTLS) - } else { - // join this node to the cluster - s.MetaClient.SetMetaServers(s.joinPeers) - s.MetaClient.SetTLS(s.metaUseTLS) - } - if err := s.MetaClient.Open(); err != nil { - return err - } - - // if the node ID is > 0 then we need to initialize the metaclient - if s.Node.ID > 0 { - s.MetaClient.WaitForDataChanged() - } - - if s.config.Data.Enabled { - // If we've already created a data node for our id, we're done - if _, err := s.MetaClient.DataNode(s.Node.ID); err == nil { - return nil - } - - n, err := s.MetaClient.CreateDataNode(s.HTTPAddr(), s.TCPAddr()) - for err != nil { - log.Printf("Unable to create data node. retry in 1s: %s", err.Error()) - time.Sleep(time.Second) - n, err = s.MetaClient.CreateDataNode(s.HTTPAddr(), s.TCPAddr()) - } - s.Node.ID = n.ID - - if err := s.Node.Save(); err != nil { - return err - } - } - - return nil -} - // HTTPAddr returns the HTTP address used by other nodes for HTTP queries and writes. func (s *Server) HTTPAddr() string { return s.remoteAddr(s.httpAPIAddr) @@ -677,21 +555,9 @@ func (s *Server) TCPAddr() string { return s.remoteAddr(s.tcpAddr) } -func (s *Server) remoteAddr(addr string) string { - hostname := s.config.Hostname - if hostname == "" { - hostname = meta.DefaultHostname - } - remote, err := meta.DefaultHost(hostname, addr) - if err != nil { - return addr - } - return remote -} - // MetaServers returns the meta node HTTP addresses used by this server. func (s *Server) MetaServers() []string { - return s.MetaClient.MetaServers() + return []string{s.HTTPAddr()} } // Service represents a service attached to the server. @@ -755,9 +621,29 @@ type monitorPointsWriter cluster.PointsWriter func (pw *monitorPointsWriter) WritePoints(database, retentionPolicy string, points models.Points) error { return (*cluster.PointsWriter)(pw).WritePoints(&cluster.WritePointsRequest{ - Database: database, - RetentionPolicy: retentionPolicy, - ConsistencyLevel: cluster.ConsistencyLevelOne, - Points: points, + Database: database, + RetentionPolicy: retentionPolicy, + Points: points, }) } + +func (s *Server) remoteAddr(addr string) string { + hostname := s.config.Hostname + remote, err := DefaultHost(hostname, addr) + if err != nil { + return addr + } + return remote +} + +func DefaultHost(hostname, addr string) (string, error) { + host, port, err := net.SplitHostPort(addr) + if err != nil { + return "", err + } + + if host == "" || host == "0.0.0.0" || host == "::" { + return net.JoinHostPort(hostname, port), nil + } + return addr, nil +} diff --git a/cmd/influxd/run/server_helpers_test.go b/cmd/influxd/run/server_helpers_test.go index a5bee471d2d..b46176851ef 100644 --- a/cmd/influxd/run/server_helpers_test.go +++ b/cmd/influxd/run/server_helpers_test.go @@ -49,10 +49,7 @@ func NewServer(c *run.Config) *Server { } // OpenServer opens a test server. -func OpenServer(c *run.Config, joinURLs string) *Server { - if len(joinURLs) > 0 { - c.Meta.JoinPeers = strings.Split(joinURLs, ",") - } +func OpenServer(c *run.Config) *Server { s := NewServer(c) configureLogging(s) if err := s.Open(); err != nil { @@ -68,7 +65,6 @@ func OpenServerWithVersion(c *run.Config, version string) *Server { Commit: "", Branch: "", } - fmt.Println(">>> ", c.Data.Enabled) srv, _ := run.NewServer(c, buildInfo) s := Server{ Server: srv, @@ -83,8 +79,8 @@ func OpenServerWithVersion(c *run.Config, version string) *Server { } // OpenDefaultServer opens a test server with a default database & retention policy. -func OpenDefaultServer(c *run.Config, joinURLs string) *Server { - s := OpenServer(c, joinURLs) +func OpenDefaultServer(c *run.Config) *Server { + s := OpenServer(c) if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 0)); err != nil { panic(err) } @@ -105,9 +101,6 @@ func (s *Server) Close() { if err := os.RemoveAll(s.Config.Data.Dir); err != nil { panic(err.Error()) } - if err := os.RemoveAll(s.Config.HintedHandoff.Dir); err != nil { - panic(err.Error()) - } } // URL returns the base URL for the httpd endpoint. @@ -238,16 +231,11 @@ func (s *Server) MustWrite(db, rp, body string, params url.Values) string { // NewConfig returns the default config with temporary paths. func NewConfig() *run.Config { c := run.NewConfig() + c.BindAddress = "127.0.0.1:0" c.ReportingDisabled = true c.Cluster.ShardWriterTimeout = toml.Duration(30 * time.Second) c.Cluster.WriteTimeout = toml.Duration(30 * time.Second) c.Meta.Dir = MustTempFile() - c.Meta.BindAddress = "127.0.0.1:0" - c.Meta.HTTPBindAddress = "127.0.0.1:0" - c.Meta.HeartbeatTimeout = toml.Duration(50 * time.Millisecond) - c.Meta.ElectionTimeout = toml.Duration(50 * time.Millisecond) - c.Meta.LeaderLeaseTimeout = toml.Duration(50 * time.Millisecond) - c.Meta.CommitTimeout = toml.Duration(5 * time.Millisecond) if !testing.Verbose() { c.Meta.LoggingEnabled = false @@ -257,8 +245,6 @@ func NewConfig() *run.Config { c.Data.WALDir = MustTempFile() c.Data.WALLoggingEnabled = false - c.HintedHandoff.Dir = MustTempFile() - c.HTTPD.Enabled = true c.HTTPD.BindAddress = "127.0.0.1:0" c.HTTPD.LogEnabled = testing.Verbose() @@ -499,7 +485,6 @@ func configureLogging(s *Server) { } nullLogger := log.New(ioutil.Discard, "", 0) s.TSDBStore.Logger = nullLogger - s.HintedHandoff.SetLogger(nullLogger) s.Monitor.SetLogger(nullLogger) s.QueryExecutor.LogOutput = ioutil.Discard s.Subscriber.SetLogger(nullLogger) @@ -517,11 +502,10 @@ type Cluster struct { func NewCluster(size int) (*Cluster, error) { c := Cluster{} - c.Servers = append(c.Servers, OpenServer(NewConfig(), "")) - metaServiceAddr := c.Servers[0].MetaServers()[0] + c.Servers = append(c.Servers, OpenServer(NewConfig())) for i := 1; i < size; i++ { - c.Servers = append(c.Servers, OpenServer(NewConfig(), metaServiceAddr)) + c.Servers = append(c.Servers, OpenServer(NewConfig())) } for _, s := range c.Servers { @@ -593,13 +577,12 @@ func NewClusterCustom(size int, cb func(index int, config *run.Config)) (*Cluste config := NewConfig() cb(0, config) - c.Servers = append(c.Servers, OpenServer(config, "")) - metaServiceAddr := c.Servers[0].MetaServers()[0] + c.Servers = append(c.Servers, OpenServer(config)) for i := 1; i < size; i++ { config := NewConfig() cb(i, config) - c.Servers = append(c.Servers, OpenServer(config, metaServiceAddr)) + c.Servers = append(c.Servers, OpenServer(config)) } for _, s := range c.Servers { diff --git a/cmd/influxd/run/server_test.go b/cmd/influxd/run/server_test.go index 69344a59cd6..63a0d1d4fa1 100644 --- a/cmd/influxd/run/server_test.go +++ b/cmd/influxd/run/server_test.go @@ -28,7 +28,7 @@ func TestServer_HTTPResponseVersion(t *testing.T) { // Ensure the database commands work. func TestServer_DatabaseCommands(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() test := tests.load(t, "database_commands") @@ -48,7 +48,7 @@ func TestServer_DatabaseCommands(t *testing.T) { func TestServer_Query_DropAndRecreateDatabase(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() test := tests.load(t, "drop_and_recreate_database") @@ -80,7 +80,7 @@ func TestServer_Query_DropAndRecreateDatabase(t *testing.T) { func TestServer_Query_DropDatabaseIsolated(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() test := tests.load(t, "drop_database_isolated") @@ -115,7 +115,7 @@ func TestServer_Query_DropDatabaseIsolated(t *testing.T) { func TestServer_Query_DropAndRecreateSeries(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() test := tests.load(t, "drop_and_recreate_series") @@ -167,7 +167,7 @@ func TestServer_Query_DropAndRecreateSeries(t *testing.T) { func TestServer_Query_DropSeriesFromRegex(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() test := tests.load(t, "drop_series_from_regex") @@ -202,7 +202,7 @@ func TestServer_RetentionPolicyCommands(t *testing.T) { t.Parallel() c := NewConfig() c.Meta.RetentionAutoCreate = false - s := OpenServer(c, "") + s := OpenServer(c) defer s.Close() test := tests.load(t, "retention_policy_commands") @@ -228,7 +228,7 @@ func TestServer_RetentionPolicyCommands(t *testing.T) { // Ensure the autocreation of retention policy works. func TestServer_DatabaseRetentionPolicyAutoCreate(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() test := tests.load(t, "retention_policy_auto_create") @@ -249,7 +249,7 @@ func TestServer_DatabaseRetentionPolicyAutoCreate(t *testing.T) { // Ensure user commands work. func TestServer_UserCommands(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() // Create a database. @@ -343,7 +343,7 @@ func TestServer_UserCommands(t *testing.T) { // Ensure the server rejects a single point via json protocol by default. func TestServer_Write_JSON(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 1*time.Hour)); err != nil { @@ -372,7 +372,7 @@ func TestServer_Write_JSON_Enabled(t *testing.T) { t.Parallel() c := NewConfig() c.HTTPD.JSONWriteEnabled = true - s := OpenServer(c, "") + s := OpenServer(c) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 1*time.Hour)); err != nil { @@ -397,7 +397,7 @@ func TestServer_Write_JSON_Enabled(t *testing.T) { // Ensure the server can create a single point via line protocol with float type and read it back. func TestServer_Write_LineProtocol_Float(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 1*time.Hour)); err != nil { @@ -422,7 +422,7 @@ func TestServer_Write_LineProtocol_Float(t *testing.T) { // Ensure the server can create a single point via line protocol with bool type and read it back. func TestServer_Write_LineProtocol_Bool(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 1*time.Hour)); err != nil { @@ -447,7 +447,7 @@ func TestServer_Write_LineProtocol_Bool(t *testing.T) { // Ensure the server can create a single point via line protocol with string type and read it back. func TestServer_Write_LineProtocol_String(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 1*time.Hour)); err != nil { @@ -472,7 +472,7 @@ func TestServer_Write_LineProtocol_String(t *testing.T) { // Ensure the server can create a single point via line protocol with integer type and read it back. func TestServer_Write_LineProtocol_Integer(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 1*time.Hour)); err != nil { @@ -494,43 +494,10 @@ func TestServer_Write_LineProtocol_Integer(t *testing.T) { } } -// Ensure the server returns a partial write response when some points fail to parse. Also validate that -// the successfully parsed points can be queried. -func TestServer_Write_LineProtocol_Partial(t *testing.T) { - t.Parallel() - s := OpenServer(NewConfig(), "") - defer s.Close() - - if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 1*time.Hour)); err != nil { - t.Fatal(err) - } - - now := now() - points := []string{ - "cpu,host=server01 value=100 " + strconv.FormatInt(now.UnixNano(), 10), - "cpu,host=server01 value=NaN " + strconv.FormatInt(now.UnixNano(), 20), - "cpu,host=server01 value=NaN " + strconv.FormatInt(now.UnixNano(), 30), - } - if res, err := s.Write("db0", "rp0", strings.Join(points, "\n"), nil); err == nil { - t.Fatal("expected error. got nil", err) - } else if exp := ``; exp != res { - t.Fatalf("unexpected results\nexp: %s\ngot: %s\n", exp, res) - } else if exp := "partial write"; !strings.Contains(err.Error(), exp) { - t.Fatalf("unexpected error: exp\nexp: %v\ngot: %v", exp, err) - } - - // Verify the data was written. - if res, err := s.Query(`SELECT * FROM db0.rp0.cpu GROUP BY *`); err != nil { - t.Fatal(err) - } else if exp := fmt.Sprintf(`{"results":[{"series":[{"name":"cpu","tags":{"host":"server01"},"columns":["time","value"],"values":[["%s",100]]}]}]}`, now.Format(time.RFC3339Nano)); exp != res { - t.Fatalf("unexpected results\nexp: %s\ngot: %s\n", exp, res) - } -} - // Ensure the server can query with default databases (via param) and default retention policy func TestServer_Query_DefaultDBAndRP(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 1*time.Hour)); err != nil { @@ -591,7 +558,7 @@ func TestServer_Query_DefaultDBAndRP(t *testing.T) { // Ensure the server can have a database with multiple measurements. func TestServer_Query_Multiple_Measurements(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 1*time.Hour)); err != nil { @@ -641,7 +608,7 @@ func TestServer_Query_Multiple_Measurements(t *testing.T) { // Ensure the server correctly supports data with identical tag values. func TestServer_Query_IdenticalTagValues(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 1*time.Hour)); err != nil { @@ -696,7 +663,7 @@ func TestServer_Query_IdenticalTagValues(t *testing.T) { // Ensure the server can handle a query that involves accessing no shards. func TestServer_Query_NoShards(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 1*time.Hour)); err != nil { @@ -738,7 +705,7 @@ func TestServer_Query_NoShards(t *testing.T) { // Ensure the server can query a non-existent field func TestServer_Query_NonExistent(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 1*time.Hour)); err != nil { @@ -785,7 +752,7 @@ func TestServer_Query_NonExistent(t *testing.T) { // Ensure the server can perform basic math func TestServer_Query_Math(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db", newRetentionPolicyInfo("rp", 1, 1*time.Hour)); err != nil { @@ -881,7 +848,7 @@ func TestServer_Query_Math(t *testing.T) { // Ensure the server can query with the count aggregate function func TestServer_Query_Count(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 1*time.Hour)); err != nil { @@ -955,7 +922,7 @@ func TestServer_Query_Count(t *testing.T) { // Ensure the server can query with Now(). func TestServer_Query_Now(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 1*time.Hour)); err != nil { @@ -1012,7 +979,7 @@ func TestServer_Query_Now(t *testing.T) { // Ensure the server can query with epoch precisions. func TestServer_Query_EpochPrecision(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 1*time.Hour)); err != nil { @@ -1085,7 +1052,7 @@ func TestServer_Query_EpochPrecision(t *testing.T) { // Ensure the server works with tag queries. func TestServer_Query_Tags(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 1*time.Hour)); err != nil { @@ -1266,7 +1233,7 @@ func TestServer_Query_Tags(t *testing.T) { // Ensure the server correctly queries with an alias. func TestServer_Query_Alias(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 1*time.Hour)); err != nil { @@ -1345,7 +1312,7 @@ func TestServer_Query_Alias(t *testing.T) { // Ensure the server will succeed and error for common scenarios. func TestServer_Query_Common(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 1*time.Hour)); err != nil { @@ -1422,7 +1389,7 @@ func TestServer_Query_Common(t *testing.T) { // Ensure the server can query two points. func TestServer_Query_SelectTwoPoints(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 1*time.Hour)); err != nil { @@ -1470,7 +1437,7 @@ func TestServer_Query_SelectTwoPoints(t *testing.T) { // Ensure the server can query two negative points. func TestServer_Query_SelectTwoNegativePoints(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 1*time.Hour)); err != nil { @@ -1511,7 +1478,7 @@ func TestServer_Query_SelectTwoNegativePoints(t *testing.T) { // Ensure the server can query with relative time. func TestServer_Query_SelectRelativeTime(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 1*time.Hour)); err != nil { @@ -1560,7 +1527,7 @@ func TestServer_Query_SelectRelativeTime(t *testing.T) { // Ensure the server can handle various simple derivative queries. func TestServer_Query_SelectRawDerivative(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 1*time.Hour)); err != nil { @@ -1606,7 +1573,7 @@ func TestServer_Query_SelectRawDerivative(t *testing.T) { // Ensure the server can handle various simple non_negative_derivative queries. func TestServer_Query_SelectRawNonNegativeDerivative(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 1*time.Hour)); err != nil { @@ -1656,7 +1623,7 @@ cpu value=20 1278010024000000000 // Ensure the server can handle various group by time derivative queries. func TestServer_Query_SelectGroupByTimeDerivative(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 1*time.Hour)); err != nil { @@ -1786,7 +1753,7 @@ cpu value=25 1278010023000000000 // Ensure the server can handle various group by time derivative queries. func TestServer_Query_SelectGroupByTimeDerivativeWithFill(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 1*time.Hour)); err != nil { @@ -2005,7 +1972,7 @@ cpu value=20 1278010021000000000 // of points than others in a group by interval the results are correct func TestServer_Query_MergeMany(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() // set infinite retention policy as we are inserting data in the past and don't want retention policy enforcement to make this test racy @@ -2065,7 +2032,7 @@ func TestServer_Query_MergeMany(t *testing.T) { func TestServer_Query_SLimitAndSOffset(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() // set infinite retention policy as we are inserting data in the past and don't want retention policy enforcement to make this test racy @@ -2122,7 +2089,7 @@ func TestServer_Query_SLimitAndSOffset(t *testing.T) { func TestServer_Query_Regex(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 0)); err != nil { @@ -2194,7 +2161,7 @@ func TestServer_Query_Regex(t *testing.T) { func TestServer_Query_Aggregates_Int(t *testing.T) { t.Parallel() - s := OpenDefaultServer(NewConfig(), "") + s := OpenDefaultServer(NewConfig()) defer s.Close() test := NewTest("db0", "rp0") @@ -2234,7 +2201,7 @@ func TestServer_Query_Aggregates_Int(t *testing.T) { func TestServer_Query_Aggregates_IntMax(t *testing.T) { t.Parallel() - s := OpenDefaultServer(NewConfig(), "") + s := OpenDefaultServer(NewConfig()) defer s.Close() test := NewTest("db0", "rp0") @@ -2274,7 +2241,7 @@ func TestServer_Query_Aggregates_IntMax(t *testing.T) { func TestServer_Query_Aggregates_IntMany(t *testing.T) { t.Parallel() - s := OpenDefaultServer(NewConfig(), "") + s := OpenDefaultServer(NewConfig()) defer s.Close() test := NewTest("db0", "rp0") @@ -2408,7 +2375,7 @@ func TestServer_Query_Aggregates_IntMany(t *testing.T) { func TestServer_Query_Aggregates_IntMany_GroupBy(t *testing.T) { t.Parallel() - s := OpenDefaultServer(NewConfig(), "") + s := OpenDefaultServer(NewConfig()) defer s.Close() test := NewTest("db0", "rp0") @@ -2502,7 +2469,7 @@ func TestServer_Query_Aggregates_IntMany_GroupBy(t *testing.T) { func TestServer_Query_Aggregates_IntMany_OrderByDesc(t *testing.T) { t.Parallel() - s := OpenDefaultServer(NewConfig(), "") + s := OpenDefaultServer(NewConfig()) defer s.Close() test := NewTest("db0", "rp0") @@ -2548,7 +2515,7 @@ func TestServer_Query_Aggregates_IntMany_OrderByDesc(t *testing.T) { func TestServer_Query_Aggregates_IntOverlap(t *testing.T) { t.Parallel() - s := OpenDefaultServer(NewConfig(), "") + s := OpenDefaultServer(NewConfig()) defer s.Close() test := NewTest("db0", "rp0") @@ -2615,7 +2582,7 @@ func TestServer_Query_Aggregates_IntOverlap(t *testing.T) { func TestServer_Query_Aggregates_FloatSingle(t *testing.T) { t.Parallel() - s := OpenDefaultServer(NewConfig(), "") + s := OpenDefaultServer(NewConfig()) defer s.Close() test := NewTest("db0", "rp0") @@ -2654,7 +2621,7 @@ func TestServer_Query_Aggregates_FloatSingle(t *testing.T) { func TestServer_Query_Aggregates_FloatMany(t *testing.T) { t.Parallel() - s := OpenDefaultServer(NewConfig(), "") + s := OpenDefaultServer(NewConfig()) defer s.Close() test := NewTest("db0", "rp0") @@ -2782,7 +2749,7 @@ func TestServer_Query_Aggregates_FloatMany(t *testing.T) { func TestServer_Query_Aggregates_FloatOverlap(t *testing.T) { t.Parallel() - s := OpenDefaultServer(NewConfig(), "") + s := OpenDefaultServer(NewConfig()) defer s.Close() test := NewTest("db0", "rp0") @@ -2848,7 +2815,7 @@ func TestServer_Query_Aggregates_FloatOverlap(t *testing.T) { func TestServer_Query_Aggregates_Load(t *testing.T) { t.Parallel() - s := OpenDefaultServer(NewConfig(), "") + s := OpenDefaultServer(NewConfig()) defer s.Close() test := NewTest("db0", "rp0") @@ -2901,7 +2868,7 @@ func TestServer_Query_Aggregates_Load(t *testing.T) { func TestServer_Query_Aggregates_CPU(t *testing.T) { t.Parallel() - s := OpenDefaultServer(NewConfig(), "") + s := OpenDefaultServer(NewConfig()) defer s.Close() test := NewTest("db0", "rp0") @@ -2941,7 +2908,7 @@ func TestServer_Query_Aggregates_CPU(t *testing.T) { func TestServer_Query_Aggregates_String(t *testing.T) { t.Parallel() - s := OpenDefaultServer(NewConfig(), "") + s := OpenDefaultServer(NewConfig()) defer s.Close() test := NewTest("db0", "rp0") @@ -3018,7 +2985,7 @@ func TestServer_Query_Aggregates_String(t *testing.T) { func TestServer_Query_AggregateSelectors(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 0)); err != nil { @@ -3314,7 +3281,7 @@ func TestServer_Query_AggregateSelectors(t *testing.T) { func TestServer_Query_TopInt(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 0)); err != nil { @@ -3481,7 +3448,7 @@ func TestServer_Query_TopInt(t *testing.T) { // Test various aggregates when different series only have data for the same timestamp. func TestServer_Query_Aggregates_IdenticalTime(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 0)); err != nil { @@ -3549,7 +3516,7 @@ func TestServer_Query_Aggregates_IdenticalTime(t *testing.T) { // but will only put the values in the bucket that match the time range func TestServer_Query_GroupByTimeCutoffs(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 0)); err != nil { @@ -3631,7 +3598,7 @@ func TestServer_Query_GroupByTimeCutoffs(t *testing.T) { func TestServer_Write_Precision(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 0)); err != nil { @@ -3748,7 +3715,7 @@ func TestServer_Write_Precision(t *testing.T) { func TestServer_Query_Wildcards(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 0)); err != nil { @@ -3866,7 +3833,7 @@ func TestServer_Query_Wildcards(t *testing.T) { func TestServer_Query_WildcardExpansion(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 0)); err != nil { @@ -3946,7 +3913,7 @@ func TestServer_Query_WildcardExpansion(t *testing.T) { func TestServer_Query_AcrossShardsAndFields(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 0)); err != nil { @@ -4020,7 +3987,7 @@ func TestServer_Query_AcrossShardsAndFields(t *testing.T) { func TestServer_Query_Where_Fields(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 0)); err != nil { @@ -4242,7 +4209,7 @@ func TestServer_Query_Where_Fields(t *testing.T) { func TestServer_Query_Where_With_Tags(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 0)); err != nil { @@ -4300,7 +4267,7 @@ func TestServer_Query_Where_With_Tags(t *testing.T) { func TestServer_Query_LimitAndOffset(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 0)); err != nil { @@ -4405,7 +4372,7 @@ func TestServer_Query_LimitAndOffset(t *testing.T) { func TestServer_Query_Fill(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 0)); err != nil { @@ -4504,7 +4471,7 @@ func TestServer_Query_Fill(t *testing.T) { func TestServer_Query_Chunk(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 0)); err != nil { @@ -4556,7 +4523,7 @@ func TestServer_Query_Chunk(t *testing.T) { func TestServer_Query_DropAndRecreateMeasurement(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 0)); err != nil { @@ -4722,7 +4689,7 @@ func TestServer_Query_DropAndRecreateMeasurement(t *testing.T) { func TestServer_Query_ShowSeries(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 0)); err != nil { @@ -4824,7 +4791,7 @@ func TestServer_Query_ShowSeries(t *testing.T) { func TestServer_Query_ShowMeasurements(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 0)); err != nil { @@ -4914,7 +4881,7 @@ func TestServer_Query_ShowMeasurements(t *testing.T) { func TestServer_Query_ShowTagKeys(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 0)); err != nil { @@ -5034,7 +5001,7 @@ func TestServer_Query_ShowTagKeys(t *testing.T) { func TestServer_Query_ShowFieldKeys(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 0)); err != nil { @@ -5101,7 +5068,7 @@ func TestServer_Query_ShowFieldKeys(t *testing.T) { func TestServer_ContinuousQuery(t *testing.T) { t.Skip() t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 0)); err != nil { @@ -5230,7 +5197,7 @@ func TestServer_ContinuousQuery_Deadlock(t *testing.T) { t.Skip("skipping CQ deadlock test") } t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer func() { s.Close() // Nil the server so our deadlock detector goroutine can determine if we completed writes @@ -5308,7 +5275,7 @@ func TestServer_ContinuousQuery_Deadlock(t *testing.T) { func TestServer_Query_EvilIdentifiers(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 0)); err != nil { @@ -5352,7 +5319,7 @@ func TestServer_Query_EvilIdentifiers(t *testing.T) { func TestServer_Query_OrderByTime(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 0)); err != nil { @@ -5414,7 +5381,7 @@ func TestServer_Query_OrderByTime(t *testing.T) { func TestServer_Query_FieldWithMultiplePeriods(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 0)); err != nil { @@ -5468,7 +5435,7 @@ func TestServer_Query_FieldWithMultiplePeriods(t *testing.T) { func TestServer_Query_FieldWithMultiplePeriodsMeasurementPrefixMatch(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 0)); err != nil { @@ -5522,7 +5489,7 @@ func TestServer_Query_FieldWithMultiplePeriodsMeasurementPrefixMatch(t *testing. func TestServer_Query_IntoTarget(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 0)); err != nil { @@ -5581,7 +5548,7 @@ func TestServer_Query_IntoTarget(t *testing.T) { // Subscriber points channel while writes were in-flight in the PointsWriter. func TestServer_ConcurrentPointsWriter_Subscriber(t *testing.T) { t.Parallel() - s := OpenDefaultServer(NewConfig(), "") + s := OpenDefaultServer(NewConfig()) defer s.Close() // goroutine to write points @@ -5610,7 +5577,7 @@ func TestServer_ConcurrentPointsWriter_Subscriber(t *testing.T) { // Ensure time in where clause is inclusive func TestServer_WhereTimeInclusive(t *testing.T) { t.Parallel() - s := OpenServer(NewConfig(), "") + s := OpenServer(NewConfig()) defer s.Close() if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 0)); err != nil { diff --git a/models/consistency.go b/models/consistency.go new file mode 100644 index 00000000000..97cdc51aa08 --- /dev/null +++ b/models/consistency.go @@ -0,0 +1,46 @@ +package models + +import ( + "errors" + "strings" +) + +// ConsistencyLevel represent a required replication criteria before a write can +// be returned as successful +type ConsistencyLevel int + +const ( + // ConsistencyLevelAny allows for hinted hand off, potentially no write happened yet + ConsistencyLevelAny ConsistencyLevel = iota + + // ConsistencyLevelOne requires at least one data node acknowledged a write + ConsistencyLevelOne + + // ConsistencyLevelQuorum requires a quorum of data nodes to acknowledge a write + ConsistencyLevelQuorum + + // ConsistencyLevelAll requires all data nodes to acknowledge a write + ConsistencyLevelAll +) + +var ( + // ErrInvalidConsistencyLevel is returned when parsing the string version + // of a consistency level. + ErrInvalidConsistencyLevel = errors.New("invalid consistency level") +) + +// ParseConsistencyLevel converts a consistency level string to the corresponding ConsistencyLevel const +func ParseConsistencyLevel(level string) (ConsistencyLevel, error) { + switch strings.ToLower(level) { + case "any": + return ConsistencyLevelAny, nil + case "one": + return ConsistencyLevelOne, nil + case "quorum": + return ConsistencyLevelQuorum, nil + case "all": + return ConsistencyLevelAll, nil + default: + return 0, ErrInvalidConsistencyLevel + } +} diff --git a/services/collectd/service.go b/services/collectd/service.go index f3184f5380f..685413c73cb 100644 --- a/services/collectd/service.go +++ b/services/collectd/service.go @@ -245,10 +245,9 @@ func (s *Service) writePoints() { return case batch := <-s.batcher.Out(): if err := s.PointsWriter.WritePoints(&cluster.WritePointsRequest{ - Database: s.Config.Database, - RetentionPolicy: s.Config.RetentionPolicy, - ConsistencyLevel: cluster.ConsistencyLevelAny, - Points: batch, + Database: s.Config.Database, + RetentionPolicy: s.Config.RetentionPolicy, + Points: batch, }); err == nil { s.statMap.Add(statBatchesTrasmitted, 1) s.statMap.Add(statPointsTransmitted, int64(len(batch))) diff --git a/services/graphite/service.go b/services/graphite/service.go index f8f8b292b22..c756c8b0084 100644 --- a/services/graphite/service.go +++ b/services/graphite/service.go @@ -51,14 +51,13 @@ func (c *tcpConnection) Close() { type Service struct { mu sync.Mutex - bindAddress string - database string - protocol string - batchSize int - batchPending int - batchTimeout time.Duration - consistencyLevel cluster.ConsistencyLevel - udpReadBuffer int + bindAddress string + database string + protocol string + batchSize int + batchPending int + batchTimeout time.Duration + udpReadBuffer int batcher *tsdb.PointBatcher parser *Parser @@ -107,12 +106,6 @@ func NewService(c Config) (*Service, error) { diagsKey: strings.Join([]string{"graphite", d.Protocol, d.BindAddress}, ":"), } - consistencyLevel, err := cluster.ParseConsistencyLevel(d.ConsistencyLevel) - if err != nil { - return nil, err - } - s.consistencyLevel = consistencyLevel - parser, err := NewParserWithOptions(Options{ Templates: d.Templates, DefaultTags: d.DefaultTags(), @@ -361,10 +354,9 @@ func (s *Service) processBatches(batcher *tsdb.PointBatcher) { select { case batch := <-batcher.Out(): if err := s.PointsWriter.WritePoints(&cluster.WritePointsRequest{ - Database: s.database, - RetentionPolicy: "", - ConsistencyLevel: s.consistencyLevel, - Points: batch, + Database: s.database, + RetentionPolicy: "", + Points: batch, }); err == nil { s.statMap.Add(statBatchesTransmitted, 1) s.statMap.Add(statPointsTransmitted, int64(len(batch))) diff --git a/services/hh/config.go b/services/hh/config.go deleted file mode 100644 index f6f3003f2ee..00000000000 --- a/services/hh/config.go +++ /dev/null @@ -1,68 +0,0 @@ -package hh - -import ( - "errors" - "time" - - "github.com/influxdata/influxdb/toml" -) - -const ( - // DefaultMaxSize is the default maximum size of all hinted handoff queues in bytes. - DefaultMaxSize = 1024 * 1024 * 1024 - - // DefaultMaxAge is the default maximum amount of time that a hinted handoff write - // can stay in the queue. After this time, the write will be purged. - DefaultMaxAge = 7 * 24 * time.Hour - - // DefaultRetryRateLimit is the default rate that hinted handoffs will be retried. - // The rate is in bytes per second and applies across all nodes when retried. A - // value of 0 disables the rate limit. - DefaultRetryRateLimit = 0 - - // DefaultRetryInterval is the default amount of time the system waits before - // attempting to flush hinted handoff queues. With each failure of a hinted - // handoff write, this retry interval increases exponentially until it reaches - // the maximum - DefaultRetryInterval = time.Second - - // DefaultRetryMaxInterval is the maximum the hinted handoff retry interval - // will ever be. - DefaultRetryMaxInterval = time.Minute - - // DefaultPurgeInterval is the amount of time the system waits before attempting - // to purge hinted handoff data due to age or inactive nodes. - DefaultPurgeInterval = time.Hour -) - -// Config is a hinted handoff configuration. -type Config struct { - Enabled bool `toml:"enabled"` - Dir string `toml:"dir"` - MaxSize int64 `toml:"max-size"` - MaxAge toml.Duration `toml:"max-age"` - RetryRateLimit int64 `toml:"retry-rate-limit"` - RetryInterval toml.Duration `toml:"retry-interval"` - RetryMaxInterval toml.Duration `toml:"retry-max-interval"` - PurgeInterval toml.Duration `toml:"purge-interval"` -} - -// NewConfig returns a new Config. -func NewConfig() Config { - return Config{ - Enabled: false, - MaxSize: DefaultMaxSize, - MaxAge: toml.Duration(DefaultMaxAge), - RetryRateLimit: DefaultRetryRateLimit, - RetryInterval: toml.Duration(DefaultRetryInterval), - RetryMaxInterval: toml.Duration(DefaultRetryMaxInterval), - PurgeInterval: toml.Duration(DefaultPurgeInterval), - } -} - -func (c *Config) Validate() error { - if c.Enabled && c.Dir == "" { - return errors.New("HintedHandoff.Dir must be specified") - } - return nil -} diff --git a/services/hh/config_test.go b/services/hh/config_test.go deleted file mode 100644 index 2f57a57b465..00000000000 --- a/services/hh/config_test.go +++ /dev/null @@ -1,73 +0,0 @@ -package hh_test - -import ( - "testing" - "time" - - "github.com/BurntSushi/toml" - "github.com/influxdata/influxdb/services/hh" -) - -func TestConfigParse(t *testing.T) { - // Parse configuration. - var c hh.Config - if _, err := toml.Decode(` -enabled = false -retry-interval = "10m" -retry-max-interval = "100m" -max-size=2048 -max-age="20m" -retry-rate-limit=1000 -purge-interval = "1h" -`, &c); err != nil { - t.Fatal(err) - } - - // Validate configuration. - if exp := true; c.Enabled == true { - t.Fatalf("unexpected enabled: got %v, exp %v", c.Enabled, exp) - } - - if exp := 10 * time.Minute; c.RetryInterval.String() != exp.String() { - t.Fatalf("unexpected retry interval: got %v, exp %v", c.RetryInterval, exp) - } - - if exp := 100 * time.Minute; c.RetryMaxInterval.String() != exp.String() { - t.Fatalf("unexpected retry max interval: got %v, exp %v", c.RetryMaxInterval, exp) - } - - if exp := 20 * time.Minute; c.MaxAge.String() != exp.String() { - t.Fatalf("unexpected max age: got %v, exp %v", c.MaxAge, exp) - } - - if exp := int64(2048); c.MaxSize != exp { - t.Fatalf("unexpected retry interval: got %v, exp %v", c.MaxSize, exp) - } - - if exp := int64(1000); c.RetryRateLimit != exp { - t.Fatalf("unexpected retry rate limit: got %v, exp %v", c.RetryRateLimit, exp) - } - - if exp := time.Hour; c.PurgeInterval.String() != exp.String() { - t.Fatalf("unexpected purge interval: got %v, exp %v", c.PurgeInterval, exp) - } - -} - -func TestDefaultDisabled(t *testing.T) { - // Parse empty configuration. - var c hh.Config - if _, err := toml.Decode(``, &c); err != nil { - t.Fatal(err) - } - - if exp := false; c.Enabled == true { - t.Fatalf("unexpected default Enabled value: got %v, exp %v", c.Enabled, exp) - } - - // Default configuration. - c = hh.NewConfig() - if exp := false; c.Enabled == true { - t.Fatalf("unexpected default enabled value: got %v, exp %v", c.Enabled, exp) - } -} diff --git a/services/hh/doc.go b/services/hh/doc.go deleted file mode 100644 index eeedacbad7a..00000000000 --- a/services/hh/doc.go +++ /dev/null @@ -1,5 +0,0 @@ -/* -Package hh implements a hinted handoff for writes - -*/ -package hh diff --git a/services/hh/limiter.go b/services/hh/limiter.go deleted file mode 100644 index b2a69f90d52..00000000000 --- a/services/hh/limiter.go +++ /dev/null @@ -1,61 +0,0 @@ -package hh - -import "time" - -type limiter struct { - count int64 - limit int64 - start time.Time - delay float64 -} - -// NewRateLimiter returns a new limiter configured to restrict a process to the limit per second. -// limit is the maximum amount that can be used per second. The limit should be > 0. A limit -// <= 0, will not limit the processes. -func NewRateLimiter(limit int64) *limiter { - return &limiter{ - start: time.Now(), - limit: limit, - delay: 0.5, - } -} - -// Update updates the amount used -func (t *limiter) Update(count int) { - t.count += int64(count) -} - -// Delay returns the amount of time, up to 1 second, that caller should wait -// to maintain the configured rate -func (t *limiter) Delay() time.Duration { - if t.limit > 0 { - - delta := time.Now().Sub(t.start).Seconds() - rate := int64(float64(t.count) / delta) - - // Determine how far off from the max rate we are - delayAdj := float64((t.limit - rate)) / float64(t.limit) - - // Don't adjust by more than 1 second at a time - delayAdj = t.clamp(delayAdj, -1, 1) - - t.delay -= delayAdj - if t.delay < 0 { - t.delay = 0 - } - - return time.Duration(t.delay) * time.Second - } - return time.Duration(0) -} - -func (t *limiter) clamp(value, min, max float64) float64 { - if value < min { - return min - } - - if value > max { - return max - } - return value -} diff --git a/services/hh/limiter_test.go b/services/hh/limiter_test.go deleted file mode 100644 index 5edb6e0bb70..00000000000 --- a/services/hh/limiter_test.go +++ /dev/null @@ -1,47 +0,0 @@ -package hh - -import ( - "testing" - "time" -) - -func TestLimiter(t *testing.T) { - l := NewRateLimiter(0) - l.Update(500) - if l.Delay().Nanoseconds() != 0 { - t.Errorf("limiter with no limit mismatch: got %v, exp 0", l.Delay()) - } -} - -func TestLimiterWithinLimit(t *testing.T) { - if testing.Short() { - t.Skip("Shipping TestLimiterWithinLimit") - } - - l := NewRateLimiter(1000) - for i := 0; i < 100; i++ { - // 50 ever 100ms = 500/s which should be within the rate - l.Update(50) - l.Delay() - time.Sleep(100 * time.Millisecond) - } - - // Should not have any delay - delay := l.Delay().Seconds() - if exp := int(0); int(delay) != exp { - t.Errorf("limiter rate mismatch: got %v, exp %v", int(delay), exp) - } - -} - -func TestLimiterExceeded(t *testing.T) { - l := NewRateLimiter(1000) - for i := 0; i < 10; i++ { - l.Update(200) - l.Delay() - } - delay := l.Delay().Seconds() - if int(delay) == 0 { - t.Errorf("limiter rate mismatch. expected non-zero delay") - } -} diff --git a/services/hh/node_processor.go b/services/hh/node_processor.go deleted file mode 100644 index a058aa879f1..00000000000 --- a/services/hh/node_processor.go +++ /dev/null @@ -1,295 +0,0 @@ -package hh - -import ( - "encoding/binary" - "expvar" - "fmt" - "io" - "log" - "os" - "strings" - "sync" - "time" - - "github.com/influxdata/influxdb" - "github.com/influxdata/influxdb/models" -) - -// NodeProcessor encapsulates a queue of hinted-handoff data for a node, and the -// transmission of the data to the node. -type NodeProcessor struct { - PurgeInterval time.Duration // Interval between periodic purge checks - RetryInterval time.Duration // Interval between periodic write-to-node attempts. - RetryMaxInterval time.Duration // Max interval between periodic write-to-node attempts. - MaxSize int64 // Maximum size an underlying queue can get. - MaxAge time.Duration // Maximum age queue data can get before purging. - RetryRateLimit int64 // Limits the rate data is sent to node. - nodeID uint64 - dir string - - mu sync.RWMutex - wg sync.WaitGroup - done chan struct{} - - queue *queue - meta metaClient - writer shardWriter - - statMap *expvar.Map - Logger *log.Logger -} - -// NewNodeProcessor returns a new NodeProcessor for the given node, using dir for -// the hinted-handoff data. -func NewNodeProcessor(nodeID uint64, dir string, w shardWriter, m metaClient) *NodeProcessor { - key := strings.Join([]string{"hh_processor", dir}, ":") - tags := map[string]string{"node": fmt.Sprintf("%d", nodeID), "path": dir} - - return &NodeProcessor{ - PurgeInterval: DefaultPurgeInterval, - RetryInterval: DefaultRetryInterval, - RetryMaxInterval: DefaultRetryMaxInterval, - MaxSize: DefaultMaxSize, - MaxAge: DefaultMaxAge, - nodeID: nodeID, - dir: dir, - writer: w, - meta: m, - statMap: influxdb.NewStatistics(key, "hh_processor", tags), - Logger: log.New(os.Stderr, "[handoff] ", log.LstdFlags), - } -} - -// Open opens the NodeProcessor. It will read and write data present in dir, and -// start transmitting data to the node. A NodeProcessor must be opened before it -// can accept hinted data. -func (n *NodeProcessor) Open() error { - n.mu.Lock() - defer n.mu.Unlock() - - if n.done != nil { - // Already open. - return nil - } - n.done = make(chan struct{}) - - // Create the queue directory if it doesn't already exist. - if err := os.MkdirAll(n.dir, 0700); err != nil { - return fmt.Errorf("mkdir all: %s", err) - } - - // Create the queue of hinted-handoff data. - queue, err := newQueue(n.dir, n.MaxSize) - if err != nil { - return err - } - if err := queue.Open(); err != nil { - return err - } - n.queue = queue - - n.wg.Add(1) - go n.run() - - return nil -} - -// Close closes the NodeProcessor, terminating all data tranmission to the node. -// When closed it will not accept hinted-handoff data. -func (n *NodeProcessor) Close() error { - n.mu.Lock() - defer n.mu.Unlock() - - if n.done == nil { - // Already closed. - return nil - } - - close(n.done) - n.wg.Wait() - n.done = nil - - return n.queue.Close() -} - -// Purge deletes all hinted-handoff data under management by a NodeProcessor. -// The NodeProcessor should be in the closed state before calling this function. -func (n *NodeProcessor) Purge() error { - n.mu.Lock() - defer n.mu.Unlock() - - if n.done != nil { - return fmt.Errorf("node processor is open") - } - - return os.RemoveAll(n.dir) -} - -// WriteShard writes hinted-handoff data for the given shard and node. Since it may manipulate -// hinted-handoff queues, and be called concurrently, it takes a lock during queue access. -func (n *NodeProcessor) WriteShard(shardID uint64, points []models.Point) error { - n.mu.RLock() - defer n.mu.RUnlock() - - if n.done == nil { - return fmt.Errorf("node processor is closed") - } - - n.statMap.Add(writeShardReq, 1) - n.statMap.Add(writeShardReqPoints, int64(len(points))) - - b := marshalWrite(shardID, points) - return n.queue.Append(b) -} - -// LastModified returns the time the NodeProcessor last receieved hinted-handoff data. -func (n *NodeProcessor) LastModified() (time.Time, error) { - t, err := n.queue.LastModified() - if err != nil { - return time.Time{}, err - } - return t.UTC(), nil -} - -// run attempts to send any existing hinted handoff data to the target node. It also purges -// any hinted handoff data older than the configured time. -func (n *NodeProcessor) run() { - defer n.wg.Done() - - currInterval := time.Duration(n.RetryInterval) - if currInterval > time.Duration(n.RetryMaxInterval) { - currInterval = time.Duration(n.RetryMaxInterval) - } - - for { - select { - case <-n.done: - return - - case <-time.After(n.PurgeInterval): - if err := n.queue.PurgeOlderThan(time.Now().Add(-n.MaxAge)); err != nil { - n.Logger.Printf("failed to purge for node %d: %s", n.nodeID, err.Error()) - } - - case <-time.After(currInterval): - limiter := NewRateLimiter(n.RetryRateLimit) - for { - c, err := n.SendWrite() - if err != nil { - if err == io.EOF { - // No more data, return to configured interval - currInterval = time.Duration(n.RetryInterval) - } else { - currInterval = currInterval * 2 - if currInterval > time.Duration(n.RetryMaxInterval) { - currInterval = time.Duration(n.RetryMaxInterval) - } - } - break - } - - // Success! Ensure backoff is cancelled. - currInterval = time.Duration(n.RetryInterval) - - // Update how many bytes we've sent - limiter.Update(c) - - // Block to maintain the throughput rate - time.Sleep(limiter.Delay()) - } - } - } -} - -// SendWrite attempts to sent the current block of hinted data to the target node. If successful, -// it returns the number of bytes it sent and advances to the next block. Otherwise returns EOF -// when there is no more data or the node is inactive. -func (n *NodeProcessor) SendWrite() (int, error) { - n.mu.RLock() - defer n.mu.RUnlock() - - active, err := n.Active() - if err != nil { - return 0, err - } - if !active { - return 0, io.EOF - } - - // Get the current block from the queue - buf, err := n.queue.Current() - if err != nil { - return 0, err - } - - // unmarshal the byte slice back to shard ID and points - shardID, points, err := unmarshalWrite(buf) - if err != nil { - n.Logger.Printf("unmarshal write failed: %v", err) - // Try to skip it. - if err := n.queue.Advance(); err != nil { - n.Logger.Printf("failed to advance queue for node %d: %s", n.nodeID, err.Error()) - } - return 0, err - } - - if err := n.writer.WriteShard(shardID, n.nodeID, points); err != nil { - n.statMap.Add(writeNodeReqFail, 1) - return 0, err - } - n.statMap.Add(writeNodeReq, 1) - n.statMap.Add(writeNodeReqPoints, int64(len(points))) - - if err := n.queue.Advance(); err != nil { - n.Logger.Printf("failed to advance queue for node %d: %s", n.nodeID, err.Error()) - } - - return len(buf), nil -} - -// Head returns the head of the processor's queue. -func (n *NodeProcessor) Head() string { - qp, err := n.queue.Position() - if err != nil { - return "" - } - return qp.head -} - -// Tail returns the tail of the processor's queue. -func (n *NodeProcessor) Tail() string { - qp, err := n.queue.Position() - if err != nil { - return "" - } - return qp.tail -} - -// Active returns whether this node processor is for a currently active node. -func (n *NodeProcessor) Active() (bool, error) { - nio, err := n.meta.DataNode(n.nodeID) - if err != nil { - n.Logger.Printf("failed to determine if node %d is active: %s", n.nodeID, err.Error()) - return false, err - } - return nio != nil, nil -} - -func marshalWrite(shardID uint64, points []models.Point) []byte { - b := make([]byte, 8) - binary.BigEndian.PutUint64(b, shardID) - for _, p := range points { - b = append(b, []byte(p.String())...) - b = append(b, '\n') - } - return b -} - -func unmarshalWrite(b []byte) (uint64, []models.Point, error) { - if len(b) < 8 { - return 0, nil, fmt.Errorf("too short: len = %d", len(b)) - } - ownerID := binary.BigEndian.Uint64(b[:8]) - points, err := models.ParsePoints(b[8:]) - return ownerID, points, err -} diff --git a/services/hh/node_processor_test.go b/services/hh/node_processor_test.go deleted file mode 100644 index ab1f7095129..00000000000 --- a/services/hh/node_processor_test.go +++ /dev/null @@ -1,155 +0,0 @@ -package hh - -import ( - "io" - "io/ioutil" - "os" - "testing" - "time" - - "github.com/influxdata/influxdb/models" - "github.com/influxdata/influxdb/services/meta" -) - -type fakeShardWriter struct { - ShardWriteFn func(shardID, nodeID uint64, points []models.Point) error -} - -func (f *fakeShardWriter) WriteShard(shardID, nodeID uint64, points []models.Point) error { - return f.ShardWriteFn(shardID, nodeID, points) -} - -type fakeMetaStore struct { - NodeFn func(nodeID uint64) (*meta.NodeInfo, error) -} - -func (f *fakeMetaStore) DataNode(nodeID uint64) (*meta.NodeInfo, error) { - return f.NodeFn(nodeID) -} - -func TestNodeProcessorSendBlock(t *testing.T) { - dir, err := ioutil.TempDir("", "node_processor_test") - if err != nil { - t.Fatalf("failed to create temp dir: %v", err) - } - - // expected data to be queue and sent to the shardWriter - var expShardID, expNodeID, count = uint64(100), uint64(200), 0 - pt := models.MustNewPoint("cpu", models.Tags{"foo": "bar"}, models.Fields{"value": 1.0}, time.Unix(0, 0)) - - sh := &fakeShardWriter{ - ShardWriteFn: func(shardID, nodeID uint64, points []models.Point) error { - count++ - if shardID != expShardID { - t.Errorf("SendWrite() shardID mismatch: got %v, exp %v", shardID, expShardID) - } - if nodeID != expNodeID { - t.Errorf("SendWrite() nodeID mismatch: got %v, exp %v", nodeID, expNodeID) - } - - if exp := 1; len(points) != exp { - t.Fatalf("SendWrite() points mismatch: got %v, exp %v", len(points), exp) - } - - if points[0].String() != pt.String() { - t.Fatalf("SendWrite() points mismatch:\n got %v\n exp %v", points[0].String(), pt.String()) - } - - return nil - }, - } - metastore := &fakeMetaStore{ - NodeFn: func(nodeID uint64) (*meta.NodeInfo, error) { - if nodeID == expNodeID { - return &meta.NodeInfo{}, nil - } - return nil, nil - }, - } - - n := NewNodeProcessor(expNodeID, dir, sh, metastore) - if n == nil { - t.Fatalf("Failed to create node processor: %v", err) - } - - if err := n.Open(); err != nil { - t.Fatalf("Failed to open node processor: %v", err) - } - - // Check the active state. - active, err := n.Active() - if err != nil { - t.Fatalf("Failed to check node processor state: %v", err) - } - if !active { - t.Fatalf("Node processor state is unexpected value of: %v", active) - } - - // This should queue a write for the active node. - if err := n.WriteShard(expShardID, []models.Point{pt}); err != nil { - t.Fatalf("SendWrite() failed to write points: %v", err) - } - - // This should send the write to the shard writer - if _, err := n.SendWrite(); err != nil { - t.Fatalf("SendWrite() failed to write points: %v", err) - } - - if exp := 1; count != exp { - t.Fatalf("SendWrite() write count mismatch: got %v, exp %v", count, exp) - } - - // All data should have been handled so no writes should be sent again - if _, err := n.SendWrite(); err != nil && err != io.EOF { - t.Fatalf("SendWrite() failed to write points: %v", err) - } - - // Count should stay the same - if exp := 1; count != exp { - t.Fatalf("SendWrite() write count mismatch: got %v, exp %v", count, exp) - } - - // Make the node inactive. - sh.ShardWriteFn = func(shardID, nodeID uint64, points []models.Point) error { - t.Fatalf("write sent to inactive node") - return nil - } - metastore.NodeFn = func(nodeID uint64) (*meta.NodeInfo, error) { - return nil, nil - } - - // Check the active state. - active, err = n.Active() - if err != nil { - t.Fatalf("Failed to check node processor state: %v", err) - } - if active { - t.Fatalf("Node processor state is unexpected value of: %v", active) - } - - // This should queue a write for the node. - if err := n.WriteShard(expShardID, []models.Point{pt}); err != nil { - t.Fatalf("SendWrite() failed to write points: %v", err) - } - - // This should not send the write to the shard writer since the node is inactive. - if _, err := n.SendWrite(); err != nil && err != io.EOF { - t.Fatalf("SendWrite() failed to write points: %v", err) - } - - if exp := 1; count != exp { - t.Fatalf("SendWrite() write count mismatch: got %v, exp %v", count, exp) - } - - if err := n.Close(); err != nil { - t.Fatalf("Failed to close node processor: %v", err) - } - - // Confirm that purging works ok. - if err := n.Purge(); err != nil { - t.Fatalf("Failed to purge node processor: %v", err) - } - if _, err := os.Stat(dir); !os.IsNotExist(err) { - t.Fatalf("Node processor directory still present after purge") - } -} diff --git a/services/hh/queue.go b/services/hh/queue.go deleted file mode 100644 index a21c9480fa5..00000000000 --- a/services/hh/queue.go +++ /dev/null @@ -1,710 +0,0 @@ -package hh - -import ( - "encoding/binary" - "fmt" - "io" - "io/ioutil" - "os" - "path/filepath" - "strconv" - "sync" - "time" -) - -// Possible errors returned by a hinted handoff queue. -var ( - ErrNotOpen = fmt.Errorf("queue not open") - ErrQueueFull = fmt.Errorf("queue is full") - ErrSegmentFull = fmt.Errorf("segment is full") -) - -const ( - defaultSegmentSize = 10 * 1024 * 1024 - footerSize = 8 -) - -// queue is a bounded, disk-backed, append-only type that combines queue and -// log semantics. byte slices can be appended and read back in-order. -// The queue maintains a pointer to the current head -// byte slice and can re-read from the head until it has been advanced. -// -// Internally, the queue writes byte slices to multiple segment files so -// that disk space can be reclaimed. When a segment file is larger than -// the max segment size, a new file is created. Segments are removed -// after their head pointer has advanced past the last entry. The first -// segment is the head, and the last segment is the tail. Reads are from -// the head segment and writes tail segment. -// -// queues can have a max size configured such that when the size of all -// segments on disk exceeds the size, write will fail. -// -// ┌─────┐ -// │Head │ -// ├─────┘ -// │ -// ▼ -// ┌─────────────────┐ ┌─────────────────┐┌─────────────────┐ -// │Segment 1 - 10MB │ │Segment 2 - 10MB ││Segment 3 - 10MB │ -// └─────────────────┘ └─────────────────┘└─────────────────┘ -// ▲ -// │ -// │ -// ┌─────┐ -// │Tail │ -// └─────┘ -type queue struct { - mu sync.RWMutex - - // Directory to create segments - dir string - - // The head and tail segments. Reads are from the beginning of head, - // writes are appended to the tail. - head, tail *segment - - // The maximum size in bytes of a segment file before a new one should be created - maxSegmentSize int64 - - // The maximum size allowed in bytes of all segments before writes will return - // an error - maxSize int64 - - // The segments that exist on disk - segments segments -} -type queuePos struct { - head string - tail string -} - -type segments []*segment - -// newQueue create a queue that will store segments in dir and that will -// consume more than maxSize on disk. -func newQueue(dir string, maxSize int64) (*queue, error) { - return &queue{ - dir: dir, - maxSegmentSize: defaultSegmentSize, - maxSize: maxSize, - segments: segments{}, - }, nil -} - -// Open opens the queue for reading and writing -func (l *queue) Open() error { - l.mu.Lock() - defer l.mu.Unlock() - - segments, err := l.loadSegments() - if err != nil { - return err - } - l.segments = segments - - if len(l.segments) == 0 { - _, err := l.addSegment() - if err != nil { - return err - } - } - - l.head = l.segments[0] - l.tail = l.segments[len(l.segments)-1] - - // If the head has been fully advanced and the segment size is modified, - // existing segments an get stuck and never allow clients to advance further. - // This advances the segment if the current head is already at the end. - _, err = l.head.current() - if err == io.EOF { - return l.trimHead() - } - - return nil -} - -// Close stops the queue for reading and writing -func (l *queue) Close() error { - l.mu.Lock() - defer l.mu.Unlock() - - for _, s := range l.segments { - if err := s.close(); err != nil { - return err - } - } - l.head = nil - l.tail = nil - l.segments = nil - return nil -} - -// Remove removes all underlying file-based resources for the queue. -// It is an error to call this on an open queue. -func (l *queue) Remove() error { - l.mu.Lock() - defer l.mu.Unlock() - - if l.head != nil || l.tail != nil || l.segments != nil { - return fmt.Errorf("queue is open") - } - - return os.RemoveAll(l.dir) -} - -// SetMaxSegmentSize updates the max segment size for new and existing -// segments. -func (l *queue) SetMaxSegmentSize(size int64) error { - l.mu.Lock() - defer l.mu.Unlock() - - l.maxSegmentSize = size - - for _, s := range l.segments { - s.SetMaxSegmentSize(size) - } - - if l.tail.diskUsage() >= l.maxSegmentSize { - segment, err := l.addSegment() - if err != nil { - return err - } - l.tail = segment - } - return nil -} - -func (l *queue) PurgeOlderThan(when time.Time) error { - l.mu.Lock() - defer l.mu.Unlock() - - if len(l.segments) == 0 { - return nil - } - - cutoff := when.Truncate(time.Second) - for { - mod, err := l.head.lastModified() - if err != nil { - return err - } - - if mod.After(cutoff) || mod.Equal(cutoff) { - return nil - } - - // If this is the last segment, first append a new one allowing - // trimming to proceed. - if len(l.segments) == 1 { - _, err := l.addSegment() - if err != nil { - return err - } - } - - if err := l.trimHead(); err != nil { - return err - } - } -} - -// LastModified returns the last time the queue was modified. -func (l *queue) LastModified() (time.Time, error) { - l.mu.RLock() - defer l.mu.RUnlock() - - if l.tail != nil { - return l.tail.lastModified() - } - return time.Time{}.UTC(), nil -} - -func (l *queue) Position() (*queuePos, error) { - l.mu.RLock() - defer l.mu.RUnlock() - - qp := &queuePos{} - if l.head != nil { - qp.head = fmt.Sprintf("%s:%d", l.head.path, l.head.pos) - } - if l.tail != nil { - qp.tail = fmt.Sprintf("%s:%d", l.tail.path, l.tail.filePos()) - } - return qp, nil -} - -// diskUsage returns the total size on disk used by the queue -func (l *queue) diskUsage() int64 { - var size int64 - for _, s := range l.segments { - size += s.diskUsage() - } - return size -} - -// addSegment creates a new empty segment file -func (l *queue) addSegment() (*segment, error) { - nextID, err := l.nextSegmentID() - if err != nil { - return nil, err - } - - segment, err := newSegment(filepath.Join(l.dir, strconv.FormatUint(nextID, 10)), l.maxSegmentSize) - if err != nil { - return nil, err - } - - l.segments = append(l.segments, segment) - return segment, nil -} - -// loadSegments loads all segments on disk -func (l *queue) loadSegments() (segments, error) { - segments := []*segment{} - - files, err := ioutil.ReadDir(l.dir) - if err != nil { - return segments, err - } - - for _, segment := range files { - // Segments should be files. Skip anything that is not a dir. - if segment.IsDir() { - continue - } - - // Segments file names are all numeric - _, err := strconv.ParseUint(segment.Name(), 10, 64) - if err != nil { - continue - } - - segment, err := newSegment(filepath.Join(l.dir, segment.Name()), l.maxSegmentSize) - if err != nil { - return segments, err - } - - segments = append(segments, segment) - } - return segments, nil -} - -// nextSegmentID returns the next segment ID that is free -func (l *queue) nextSegmentID() (uint64, error) { - segments, err := ioutil.ReadDir(l.dir) - if err != nil { - return 0, err - } - - var maxID uint64 - for _, segment := range segments { - // Segments should be files. Skip anything that is not a dir. - if segment.IsDir() { - continue - } - - // Segments file names are all numeric - segmentID, err := strconv.ParseUint(segment.Name(), 10, 64) - if err != nil { - continue - } - - if segmentID > maxID { - maxID = segmentID - } - } - - return maxID + 1, nil -} - -// Append appends a byte slice to the end of the queue -func (l *queue) Append(b []byte) error { - l.mu.Lock() - defer l.mu.Unlock() - - if l.tail == nil { - return ErrNotOpen - } - - if l.diskUsage()+int64(len(b)) > l.maxSize { - return ErrQueueFull - } - - // Append the entry to the tail, if the segment is full, - // try to create new segment and retry the append - if err := l.tail.append(b); err == ErrSegmentFull { - segment, err := l.addSegment() - if err != nil { - return err - } - l.tail = segment - return l.tail.append(b) - } - return nil -} - -// Current returns the current byte slice at the head of the queue -func (l *queue) Current() ([]byte, error) { - if l.head == nil { - return nil, ErrNotOpen - } - - return l.head.current() -} - -// Advance moves the head point to the next byte slice in the queue -func (l *queue) Advance() error { - l.mu.Lock() - defer l.mu.Unlock() - if l.head == nil { - return ErrNotOpen - } - - err := l.head.advance() - if err == io.EOF { - if err := l.trimHead(); err != nil { - return err - } - } - - return nil -} - -func (l *queue) trimHead() error { - if len(l.segments) > 1 { - l.segments = l.segments[1:] - - if err := l.head.close(); err != nil { - return err - } - if err := os.Remove(l.head.path); err != nil { - return err - } - l.head = l.segments[0] - } - return nil -} - -// Segment is a queue using a single file. The structure of a segment is a series -// lengths + block with a single footer point to the position in the segment of the -// current head block. -// -// ┌──────────────────────────┐ ┌──────────────────────────┐ ┌────────────┐ -// │ Block 1 │ │ Block 2 │ │ Footer │ -// └──────────────────────────┘ └──────────────────────────┘ └────────────┘ -// ┌────────────┐┌────────────┐ ┌────────────┐┌────────────┐ ┌────────────┐ -// │Block 1 Len ││Block 1 Body│ │Block 2 Len ││Block 2 Body│ │Head Offset │ -// │ 8 bytes ││ N bytes │ │ 8 bytes ││ N bytes │ │ 8 bytes │ -// └────────────┘└────────────┘ └────────────┘└────────────┘ └────────────┘ -// -// The footer holds the pointer to the head entry at the end of the segment to allow writes -// to seek to the end and write sequentially (vs having to seek back to the beginning of -// the segment to update the head pointer). Reads must seek to the end then back into the -// segment offset stored in the footer. -// -// Segments store arbitrary byte slices and leave the serialization to the caller. Segments -// are created with a max size and will block writes when the segment is full. -type segment struct { - mu sync.RWMutex - - size int64 - file *os.File - path string - - pos int64 - currentSize int64 - maxSize int64 -} - -func newSegment(path string, maxSize int64) (*segment, error) { - f, err := os.OpenFile(path, os.O_CREATE|os.O_RDWR, 0600) - if err != nil { - return nil, err - } - - stats, err := os.Stat(path) - if err != nil { - return nil, err - } - - s := &segment{file: f, path: path, size: stats.Size(), maxSize: maxSize} - - if err := s.open(); err != nil { - return nil, err - } - - return s, nil -} - -func (l *segment) open() error { - l.mu.Lock() - defer l.mu.Unlock() - - // If it's a new segment then write the location of the current record in this segment - if l.size == 0 { - l.pos = 0 - l.currentSize = 0 - - if err := l.writeUint64(uint64(l.pos)); err != nil { - return err - } - - if err := l.file.Sync(); err != nil { - return err - } - - l.size = footerSize - - return nil - } - - // Existing segment so read the current position and the size of the current block - if err := l.seekEnd(-footerSize); err != nil { - return err - } - - pos, err := l.readUint64() - if err != nil { - return err - } - l.pos = int64(pos) - - if err := l.seekToCurrent(); err != nil { - return err - } - - // If we're at the end of the segment, don't read the current block size, - // it's 0. - if l.pos < l.size-footerSize { - currentSize, err := l.readUint64() - if err != nil { - return err - } - l.currentSize = int64(currentSize) - } - - return nil -} - -// append adds byte slice to the end of segment -func (l *segment) append(b []byte) error { - l.mu.Lock() - defer l.mu.Unlock() - - if l.file == nil { - return ErrNotOpen - } - - if l.size+int64(len(b)) > l.maxSize { - return ErrSegmentFull - } - - if err := l.seekEnd(-footerSize); err != nil { - return err - } - - if err := l.writeUint64(uint64(len(b))); err != nil { - return err - } - - if err := l.writeBytes(b); err != nil { - return err - } - - if err := l.writeUint64(uint64(l.pos)); err != nil { - return err - } - - if err := l.file.Sync(); err != nil { - return err - } - - if l.currentSize == 0 { - l.currentSize = int64(len(b)) - } - - l.size += int64(len(b)) + 8 // uint64 for slice length - - return nil -} - -// current returns byte slice that the current segment points -func (l *segment) current() ([]byte, error) { - l.mu.Lock() - defer l.mu.Unlock() - - if int64(l.pos) == l.size-8 { - return nil, io.EOF - } - - if err := l.seekToCurrent(); err != nil { - return nil, err - } - - // read the record size - sz, err := l.readUint64() - if err != nil { - return nil, err - } - l.currentSize = int64(sz) - - if int64(sz) > l.maxSize { - return nil, fmt.Errorf("record size out of range: max %d: got %d", l.maxSize, sz) - } - - b := make([]byte, sz) - if err := l.readBytes(b); err != nil { - return nil, err - } - - return b, nil -} - -// advance advances the current value pointer -func (l *segment) advance() error { - l.mu.Lock() - defer l.mu.Unlock() - - if l.file == nil { - return ErrNotOpen - } - - // If we're at the end of the file, can't advance - if int64(l.pos) == l.size-footerSize { - l.currentSize = 0 - return io.EOF - } - - if err := l.seekEnd(-footerSize); err != nil { - return err - } - - pos := l.pos + l.currentSize + 8 - if err := l.writeUint64(uint64(pos)); err != nil { - return err - } - - if err := l.file.Sync(); err != nil { - return err - } - l.pos = pos - - if err := l.seekToCurrent(); err != nil { - return err - } - - sz, err := l.readUint64() - if err != nil { - return err - } - l.currentSize = int64(sz) - - if int64(l.pos) == l.size-footerSize { - l.currentSize = 0 - return io.EOF - } - - return nil -} - -func (l *segment) close() error { - l.mu.Lock() - defer l.mu.Unlock() - if err := l.file.Close(); err != nil { - return err - } - l.file = nil - return nil -} - -func (l *segment) lastModified() (time.Time, error) { - l.mu.RLock() - defer l.mu.RUnlock() - - stats, err := os.Stat(l.file.Name()) - if err != nil { - return time.Time{}, err - } - return stats.ModTime().UTC(), nil -} - -func (l *segment) diskUsage() int64 { - l.mu.RLock() - defer l.mu.RUnlock() - return l.size -} - -func (l *segment) SetMaxSegmentSize(size int64) { - l.mu.Lock() - defer l.mu.Unlock() - l.maxSize = size -} - -func (l *segment) seekToCurrent() error { - return l.seek(int64(l.pos)) -} - -func (l *segment) seek(pos int64) error { - n, err := l.file.Seek(pos, os.SEEK_SET) - if err != nil { - return err - } - - if n != pos { - return fmt.Errorf("bad seek. exp %v, got %v", 0, n) - } - - return nil -} - -func (l *segment) seekEnd(pos int64) error { - _, err := l.file.Seek(pos, os.SEEK_END) - if err != nil { - return err - } - - return nil -} - -func (l *segment) filePos() int64 { - n, _ := l.file.Seek(0, os.SEEK_CUR) - return n -} - -func (l *segment) readUint64() (uint64, error) { - b := make([]byte, 8) - if err := l.readBytes(b); err != nil { - return 0, err - } - return binary.BigEndian.Uint64(b), nil -} - -func (l *segment) writeUint64(sz uint64) error { - var buf [8]byte - binary.BigEndian.PutUint64(buf[:], sz) - return l.writeBytes(buf[:]) -} - -func (l *segment) writeBytes(b []byte) error { - n, err := l.file.Write(b) - if err != nil { - return err - } - - if n != len(b) { - return fmt.Errorf("short write. got %d, exp %d", n, len(b)) - } - return nil -} - -func (l *segment) readBytes(b []byte) error { - n, err := l.file.Read(b) - if err != nil { - return err - } - - if n != len(b) { - return fmt.Errorf("bad read. exp %v, got %v", 0, n) - } - return nil -} diff --git a/services/hh/queue_test.go b/services/hh/queue_test.go deleted file mode 100644 index fd9f5b14b64..00000000000 --- a/services/hh/queue_test.go +++ /dev/null @@ -1,327 +0,0 @@ -package hh - -import ( - "fmt" - "io" - "io/ioutil" - "os" - "path/filepath" - "testing" - "time" -) - -func BenchmarkQueueAppend(b *testing.B) { - dir, err := ioutil.TempDir("", "hh_queue") - if err != nil { - b.Fatalf("failed to create temp dir: %v", err) - } - defer os.RemoveAll(dir) - - q, err := newQueue(dir, 1024*1024*1024) - if err != nil { - b.Fatalf("failed to create queue: %v", err) - } - - if err := q.Open(); err != nil { - b.Fatalf("failed to open queue: %v", err) - } - - for i := 0; i < b.N; i++ { - if err := q.Append([]byte(fmt.Sprintf("%d", i))); err != nil { - println(q.diskUsage()) - b.Fatalf("Queue.Append failed: %v", err) - } - } -} - -func TestQueueAppendOne(t *testing.T) { - dir, err := ioutil.TempDir("", "hh_queue") - if err != nil { - t.Fatalf("failed to create temp dir: %v", err) - } - defer os.RemoveAll(dir) - - q, err := newQueue(dir, 1024) - if err != nil { - t.Fatalf("failed to create queue: %v", err) - } - - if err := q.Open(); err != nil { - t.Fatalf("failed to open queue: %v", err) - } - - if err := q.Append([]byte("test")); err != nil { - t.Fatalf("Queue.Append failed: %v", err) - } - - exp := filepath.Join(dir, "1") - stats, err := os.Stat(exp) - if os.IsNotExist(err) { - t.Fatalf("Queue.Append file not exists. exp %v to exist", exp) - } - - // 8 byte header ptr + 8 byte record len + record len - if exp := int64(8 + 8 + 4); stats.Size() != exp { - t.Fatalf("Queue.Append file size mismatch. got %v, exp %v", stats.Size(), exp) - } - - cur, err := q.Current() - if err != nil { - t.Fatalf("Queue.Current failed: %v", err) - } - - if exp := "test"; string(cur) != exp { - t.Errorf("Queue.Current mismatch: got %v, exp %v", string(cur), exp) - } -} - -func TestQueueAppendMultiple(t *testing.T) { - dir, err := ioutil.TempDir("", "hh_queue") - if err != nil { - t.Fatalf("failed to create temp dir: %v", err) - } - defer os.RemoveAll(dir) - - q, err := newQueue(dir, 1024) - if err != nil { - t.Fatalf("failed to create queue: %v", err) - } - - if err := q.Open(); err != nil { - t.Fatalf("failed to open queue: %v", err) - } - - if err := q.Append([]byte("one")); err != nil { - t.Fatalf("Queue.Append failed: %v", err) - } - - if err := q.Append([]byte("two")); err != nil { - t.Fatalf("Queue.Append failed: %v", err) - } - - for _, exp := range []string{"one", "two"} { - cur, err := q.Current() - if err != nil { - t.Fatalf("Queue.Current failed: %v", err) - } - - if string(cur) != exp { - t.Errorf("Queue.Current mismatch: got %v, exp %v", string(cur), exp) - } - - if err := q.Advance(); err != nil { - t.Fatalf("Queue.Advance failed: %v", err) - } - } -} - -func TestQueueAdvancePastEnd(t *testing.T) { - dir, err := ioutil.TempDir("", "hh_queue") - if err != nil { - t.Fatalf("failed to create temp dir: %v", err) - } - defer os.RemoveAll(dir) - - // create the queue - q, err := newQueue(dir, 1024) - if err != nil { - t.Fatalf("failed to create queue: %v", err) - } - - if err := q.Open(); err != nil { - t.Fatalf("failed to open queue: %v", err) - } - - // append one entry, should go to the first segment - if err := q.Append([]byte("one")); err != nil { - t.Fatalf("Queue.Append failed: %v", err) - } - - // set the segment size low to force a new segment to be created - q.SetMaxSegmentSize(12) - - // Should go into a new segment - if err := q.Append([]byte("two")); err != nil { - t.Fatalf("Queue.Append failed: %v", err) - } - - // should read from first segment - cur, err := q.Current() - if err != nil { - t.Fatalf("Queue.Current failed: %v", err) - } - - if exp := "one"; string(cur) != exp { - t.Errorf("Queue.Current mismatch: got %v, exp %v", string(cur), exp) - } - - if err := q.Advance(); err != nil { - t.Fatalf("Queue.Advance failed: %v", err) - } - - // ensure the first segment file is removed since we've advanced past the end - _, err = os.Stat(filepath.Join(dir, "1")) - if !os.IsNotExist(err) { - t.Fatalf("Queue.Advance should have removed the segment") - } - - // should read from second segment - cur, err = q.Current() - if err != nil { - t.Fatalf("Queue.Current failed: %v", err) - } - - if exp := "two"; string(cur) != exp { - t.Errorf("Queue.Current mismatch: got %v, exp %v", string(cur), exp) - } - - _, err = os.Stat(filepath.Join(dir, "2")) - if os.IsNotExist(err) { - t.Fatalf("Queue.Advance should have removed the segment") - } - - if err := q.Advance(); err != nil { - t.Fatalf("Queue.Advance failed: %v", err) - } - - cur, err = q.Current() - if err != io.EOF { - t.Fatalf("Queue.Current should have returned error") - } -} - -func TestQueueFull(t *testing.T) { - dir, err := ioutil.TempDir("", "hh_queue") - if err != nil { - t.Fatalf("failed to create temp dir: %v", err) - } - defer os.RemoveAll(dir) - - // create the queue - q, err := newQueue(dir, 10) - if err != nil { - t.Fatalf("failed to create queue: %v", err) - } - - if err := q.Open(); err != nil { - t.Fatalf("failed to open queue: %v", err) - } - - if err := q.Append([]byte("one")); err != ErrQueueFull { - t.Fatalf("Queue.Append expected to return queue full") - } -} - -func TestQueueReopen(t *testing.T) { - dir, err := ioutil.TempDir("", "hh_queue") - if err != nil { - t.Fatalf("failed to create temp dir: %v", err) - } - defer os.RemoveAll(dir) - - // create the queue - q, err := newQueue(dir, 1024) - if err != nil { - t.Fatalf("failed to create queue: %v", err) - } - - if err := q.Open(); err != nil { - t.Fatalf("failed to open queue: %v", err) - } - - if err := q.Append([]byte("one")); err != nil { - t.Fatalf("Queue.Append failed: %v", err) - } - - cur, err := q.Current() - if err != nil { - t.Fatalf("Queue.Current failed: %v", err) - } - - if exp := "one"; string(cur) != exp { - t.Errorf("Queue.Current mismatch: got %v, exp %v", string(cur), exp) - } - - // close and re-open the queue - if err := q.Close(); err != nil { - t.Fatalf("Queue.Close failed: %v", err) - } - - if err := q.Open(); err != nil { - t.Fatalf("failed to re-open queue: %v", err) - } - - // Make sure we can read back the last current value - cur, err = q.Current() - if err != nil { - t.Fatalf("Queue.Current failed: %v", err) - } - - if exp := "one"; string(cur) != exp { - t.Errorf("Queue.Current mismatch: got %v, exp %v", string(cur), exp) - } - - if err := q.Append([]byte("two")); err != nil { - t.Fatalf("Queue.Append failed: %v", err) - } - - if err := q.Advance(); err != nil { - t.Fatalf("Queue.Advance failed: %v", err) - } - - cur, err = q.Current() - if err != nil { - t.Fatalf("Queue.Current failed: %v", err) - } - - if exp := "two"; string(cur) != exp { - t.Errorf("Queue.Current mismatch: got %v, exp %v", string(cur), exp) - } -} - -func TestPurgeQueue(t *testing.T) { - if testing.Short() { - t.Skip("Skipping purge queue") - } - - dir, err := ioutil.TempDir("", "hh_queue") - if err != nil { - t.Fatalf("failed to create temp dir: %v", err) - } - defer os.RemoveAll(dir) - - // create the queue - q, err := newQueue(dir, 1024) - if err != nil { - t.Fatalf("failed to create queue: %v", err) - } - - if err := q.Open(); err != nil { - t.Fatalf("failed to open queue: %v", err) - } - - if err := q.Append([]byte("one")); err != nil { - t.Fatalf("Queue.Append failed: %v", err) - } - - cur, err := q.Current() - if err != nil { - t.Fatalf("Queue.Current failed: %v", err) - } - - if exp := "one"; string(cur) != exp { - t.Errorf("Queue.Current mismatch: got %v, exp %v", string(cur), exp) - } - - time.Sleep(time.Second) - - if err := q.PurgeOlderThan(time.Now()); err != nil { - t.Errorf("Queue.PurgeOlderThan failed: %v", err) - } - - _, err = q.Current() - if err != io.EOF { - t.Fatalf("Queue.Current expected io.EOF, got: %v", err) - } - -} diff --git a/services/hh/service.go b/services/hh/service.go deleted file mode 100644 index f531b79ea62..00000000000 --- a/services/hh/service.go +++ /dev/null @@ -1,275 +0,0 @@ -package hh // import "github.com/influxdata/influxdb/services/hh" - -import ( - "expvar" - "fmt" - "io/ioutil" - "log" - "os" - "path/filepath" - "strconv" - "strings" - "sync" - "time" - - "github.com/influxdata/influxdb" - "github.com/influxdata/influxdb/models" - "github.com/influxdata/influxdb/monitor/diagnostics" - "github.com/influxdata/influxdb/services/meta" -) - -// ErrHintedHandoffDisabled is returned when attempting to use a -// disabled hinted handoff service. -var ErrHintedHandoffDisabled = fmt.Errorf("hinted handoff disabled") - -const ( - writeShardReq = "writeShardReq" - writeShardReqPoints = "writeShardReqPoints" - writeNodeReq = "writeNodeReq" - writeNodeReqFail = "writeNodeReqFail" - writeNodeReqPoints = "writeNodeReqPoints" -) - -// Service represents a hinted handoff service. -type Service struct { - mu sync.RWMutex - wg sync.WaitGroup - closing chan struct{} - - processors map[uint64]*NodeProcessor - - statMap *expvar.Map - Logger *log.Logger - cfg Config - - shardWriter shardWriter - MetaClient metaClient - - Monitor interface { - RegisterDiagnosticsClient(name string, client diagnostics.Client) - DeregisterDiagnosticsClient(name string) - } -} - -type shardWriter interface { - WriteShard(shardID, ownerID uint64, points []models.Point) error -} - -type metaClient interface { - DataNode(id uint64) (ni *meta.NodeInfo, err error) -} - -// NewService returns a new instance of Service. -func NewService(c Config, w shardWriter, m metaClient) *Service { - key := strings.Join([]string{"hh", c.Dir}, ":") - tags := map[string]string{"path": c.Dir} - - return &Service{ - cfg: c, - closing: make(chan struct{}), - processors: make(map[uint64]*NodeProcessor), - statMap: influxdb.NewStatistics(key, "hh", tags), - Logger: log.New(os.Stderr, "[handoff] ", log.LstdFlags), - shardWriter: w, - MetaClient: m, - } -} - -// Open opens the hinted handoff service. -func (s *Service) Open() error { - s.mu.Lock() - defer s.mu.Unlock() - if !s.cfg.Enabled { - // Allow Open to proceed, but don't do anything. - return nil - } - s.Logger.Printf("Starting hinted handoff service") - s.closing = make(chan struct{}) - - // Register diagnostics if a Monitor service is available. - if s.Monitor != nil { - s.Monitor.RegisterDiagnosticsClient("hh", s) - } - - // Create the root directory if it doesn't already exist. - s.Logger.Printf("Using data dir: %v", s.cfg.Dir) - if err := os.MkdirAll(s.cfg.Dir, 0700); err != nil { - return fmt.Errorf("mkdir all: %s", err) - } - - // Create a node processor for each node directory. - files, err := ioutil.ReadDir(s.cfg.Dir) - if err != nil { - return err - } - - for _, file := range files { - nodeID, err := strconv.ParseUint(file.Name(), 10, 64) - if err != nil { - // Not a number? Skip it. - continue - } - - n := NewNodeProcessor(nodeID, s.pathforNode(nodeID), s.shardWriter, s.MetaClient) - if err := n.Open(); err != nil { - return err - } - s.processors[nodeID] = n - } - - s.wg.Add(1) - go s.purgeInactiveProcessors() - - return nil -} - -// Close closes the hinted handoff service. -func (s *Service) Close() error { - s.Logger.Println("shutting down hh service") - s.mu.Lock() - defer s.mu.Unlock() - - for _, p := range s.processors { - if err := p.Close(); err != nil { - return err - } - } - - if s.Monitor != nil { - s.Monitor.DeregisterDiagnosticsClient("hh") - } - - if s.closing != nil { - close(s.closing) - } - s.wg.Wait() - s.closing = nil - - return nil -} - -// SetLogger sets the internal logger to the logger passed in. -func (s *Service) SetLogger(l *log.Logger) { - s.Logger = l -} - -// WriteShard queues the points write for shardID to node ownerID to handoff queue -func (s *Service) WriteShard(shardID, ownerID uint64, points []models.Point) error { - if !s.cfg.Enabled { - return ErrHintedHandoffDisabled - } - s.statMap.Add(writeShardReq, 1) - s.statMap.Add(writeShardReqPoints, int64(len(points))) - - s.mu.RLock() - processor, ok := s.processors[ownerID] - s.mu.RUnlock() - if !ok { - if err := func() error { - // Check again under write-lock. - s.mu.Lock() - defer s.mu.Unlock() - - processor, ok = s.processors[ownerID] - if !ok { - processor = NewNodeProcessor(ownerID, s.pathforNode(ownerID), s.shardWriter, s.MetaClient) - if err := processor.Open(); err != nil { - return err - } - s.processors[ownerID] = processor - } - return nil - }(); err != nil { - return err - } - } - - if err := processor.WriteShard(shardID, points); err != nil { - return err - } - - return nil -} - -// Diagnostics returns diagnostic information. -func (s *Service) Diagnostics() (*diagnostics.Diagnostics, error) { - s.mu.RLock() - defer s.mu.RUnlock() - - d := &diagnostics.Diagnostics{ - Columns: []string{"node", "active", "last modified", "head", "tail"}, - Rows: make([][]interface{}, 0, len(s.processors)), - } - - for k, v := range s.processors { - lm, err := v.LastModified() - if err != nil { - return nil, err - } - - active, err := v.Active() - if err != nil { - return nil, err - } - - d.Rows = append(d.Rows, []interface{}{k, active, lm, v.Head(), v.Tail()}) - } - return d, nil -} - -// purgeInactiveProcessors will cause the service to remove processors for inactive nodes. -func (s *Service) purgeInactiveProcessors() { - defer s.wg.Done() - ticker := time.NewTicker(time.Duration(s.cfg.PurgeInterval)) - defer ticker.Stop() - - for { - select { - case <-s.closing: - return - case <-ticker.C: - func() { - s.mu.Lock() - defer s.mu.Unlock() - - for k, v := range s.processors { - lm, err := v.LastModified() - if err != nil { - s.Logger.Printf("failed to determine LastModified for processor %d: %s", k, err.Error()) - continue - } - - active, err := v.Active() - if err != nil { - s.Logger.Printf("failed to determine if node %d is active: %s", k, err.Error()) - continue - } - if active { - // Node is active. - continue - } - - if !lm.Before(time.Now().Add(-time.Duration(s.cfg.MaxAge))) { - // Node processor contains too-young data. - continue - } - - if err := v.Close(); err != nil { - s.Logger.Printf("failed to close node processor %d: %s", k, err.Error()) - continue - } - if err := v.Purge(); err != nil { - s.Logger.Printf("failed to purge node processor %d: %s", k, err.Error()) - continue - } - delete(s.processors, k) - } - }() - } - } -} - -// pathforNode returns the directory for HH data, for the given node. -func (s *Service) pathforNode(nodeID uint64) string { - return filepath.Join(s.cfg.Dir, fmt.Sprintf("%d", nodeID)) -} diff --git a/services/httpd/handler.go b/services/httpd/handler.go index a9123af0b73..829248b3220 100644 --- a/services/httpd/handler.go +++ b/services/httpd/handler.go @@ -61,7 +61,6 @@ type Handler struct { Database(name string) (*meta.DatabaseInfo, error) Authenticate(username, password string) (ui *meta.UserInfo, err error) Users() []meta.UserInfo - Ping(checkAllMetaServers bool) error } QueryAuthorizer interface { @@ -120,14 +119,6 @@ func NewHandler(requireAuthentication, loggingEnabled, writeTrace, JSONWriteEnab "ping-head", "HEAD", "/ping", true, true, h.servePing, }, - route{ // Ping w/ status - "status", - "GET", "/status", true, true, h.serveStatus, - }, - route{ // Ping w/ status - "status-head", - "HEAD", "/status", true, true, h.serveStatus, - }, route{ // Tell data node to run CQs that should be run "process_continuous_queries", "POST", "/data/process_continuous_queries", false, false, h.serveProcessContinuousQueries, @@ -464,10 +455,9 @@ func (h *Handler) serveWriteJSON(w http.ResponseWriter, r *http.Request, body [] // Convert the json batch struct to a points writer struct if err := h.PointsWriter.WritePoints(&cluster.WritePointsRequest{ - Database: bp.Database, - RetentionPolicy: bp.RetentionPolicy, - ConsistencyLevel: cluster.ConsistencyLevelOne, - Points: points, + Database: bp.Database, + RetentionPolicy: bp.RetentionPolicy, + Points: points, }); err != nil { h.statMap.Add(statPointsWrittenFail, int64(len(points))) if influxdb.IsClientError(err) { @@ -543,25 +533,11 @@ func (h *Handler) serveWriteLine(w http.ResponseWriter, r *http.Request, body [] return } - // Determine required consistency level. - consistency := cluster.ConsistencyLevelOne - switch r.Form.Get("consistency") { - case "all": - consistency = cluster.ConsistencyLevelAll - case "any": - consistency = cluster.ConsistencyLevelAny - case "one": - consistency = cluster.ConsistencyLevelOne - case "quorum": - consistency = cluster.ConsistencyLevelQuorum - } - // Write points. if err := h.PointsWriter.WritePoints(&cluster.WritePointsRequest{ - Database: database, - RetentionPolicy: r.FormValue("rp"), - ConsistencyLevel: consistency, - Points: points, + Database: database, + RetentionPolicy: r.FormValue("rp"), + Points: points, }); influxdb.IsClientError(err) { h.statMap.Add(statPointsWrittenFail, int64(len(points))) resultError(w, influxql.Result{Err: err}, http.StatusBadRequest) @@ -594,18 +570,6 @@ func (h *Handler) servePing(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusNoContent) } -// serveStatus returns a simple response to let the client know the whole cluster is running. -func (h *Handler) serveStatus(w http.ResponseWriter, r *http.Request) { - h.statMap.Add(statStatusRequest, 1) - - if err := h.MetaClient.Ping(false); err != nil { - w.WriteHeader(http.StatusServiceUnavailable) - return - } - - w.WriteHeader(http.StatusNoContent) -} - // convertToEpoch converts result timestamps from time.Time to the specified epoch. func convertToEpoch(r *influxql.Result, epoch string) { divisor := int64(1) diff --git a/services/httpd/handler_test.go b/services/httpd/handler_test.go index c1ca1fa130d..a1dafc56ff7 100644 --- a/services/httpd/handler_test.go +++ b/services/httpd/handler_test.go @@ -340,20 +340,6 @@ func TestHandler_Version(t *testing.T) { } } -// Ensure the handler handles status requests correctly. -func TestHandler_Status(t *testing.T) { - h := NewHandler(false) - w := httptest.NewRecorder() - h.ServeHTTP(w, MustNewRequest("GET", "/status", nil)) - if w.Code != http.StatusNoContent { - t.Fatalf("unexpected status: %d", w.Code) - } - h.ServeHTTP(w, MustNewRequest("HEAD", "/status", nil)) - if w.Code != http.StatusNoContent { - t.Fatalf("unexpected status: %d", w.Code) - } -} - // Ensure write endpoint can handle bad requests func TestHandler_HandleBadRequestBody(t *testing.T) { b := bytes.NewReader(make([]byte, 10)) diff --git a/services/meta/client.go b/services/meta/client.go index c714bc67e50..80937bc1ad7 100644 --- a/services/meta/client.go +++ b/services/meta/client.go @@ -4,25 +4,22 @@ import ( "bytes" crand "crypto/rand" "crypto/sha256" - "encoding/json" "errors" "fmt" "io" "io/ioutil" "log" - "math" "math/rand" "net/http" "os" + "path/filepath" "sort" "sync" "time" "github.com/influxdata/influxdb" "github.com/influxdata/influxdb/influxql" - "github.com/influxdata/influxdb/services/meta/internal" - "github.com/gogo/protobuf/proto" "golang.org/x/crypto/bcrypt" ) @@ -37,6 +34,8 @@ const ( // SaltBytes is the number of bytes used for salts SaltBytes = 32 + + metaFile = "meta.db" ) var ( @@ -50,18 +49,19 @@ var ( // Client is used to execute commands on and read data from // a meta service cluster. type Client struct { - tls bool logger *log.Logger - nodeID uint64 - mu sync.RWMutex - metaServers []string - changed chan struct{} - closing chan struct{} - cacheData *Data + mu sync.RWMutex + closing chan struct{} + changed chan struct{} + cacheData *Data // Authentication cache. authCache map[string]authUser + + path string + + retentionAutoCreate bool } type authUser struct { @@ -71,21 +71,36 @@ type authUser struct { } // NewClient returns a new *Client. -func NewClient() *Client { +func NewClient(config *Config) *Client { return &Client{ - cacheData: &Data{}, - logger: log.New(os.Stderr, "[metaclient] ", log.LstdFlags), - authCache: make(map[string]authUser, 0), + cacheData: &Data{ + ClusterID: uint64(uint64(rand.Int63())), + Index: 1, + }, + closing: make(chan struct{}), + changed: make(chan struct{}), + logger: log.New(os.Stderr, "[metaclient] ", log.LstdFlags), + authCache: make(map[string]authUser, 0), + path: config.Dir, + retentionAutoCreate: config.RetentionAutoCreate, } } // Open a connection to a meta service cluster. func (c *Client) Open() error { - c.changed = make(chan struct{}) - c.closing = make(chan struct{}) - c.cacheData = c.retryUntilSnapshot(0) + c.mu.Lock() + defer c.mu.Unlock() + // Try to load from disk + if err := c.Load(); err != nil { + return err + } - go c.pollForUpdates() + // If this is a brand new instance, persist to disk immediatly. + if c.cacheData.Index == 1 { + if err := c.Snapshot(); err != nil { + return err + } + } return nil } @@ -109,114 +124,21 @@ func (c *Client) Close() error { return nil } -// SetMetaServers updates the meta servers on the client. -func (c *Client) SetMetaServers(a []string) { - c.mu.Lock() - defer c.mu.Unlock() - c.metaServers = a -} - -// SetTLS sets whether the client should use TLS when connecting. -// This function is not safe for concurrent use. -func (c *Client) SetTLS(v bool) { c.tls = v } - -// Ping will hit the ping endpoint for the metaservice and return nil if -// it returns 200. If checkAllMetaServers is set to true, it will hit the -// ping endpoint and tell it to verify the health of all metaservers in the -// cluster -func (c *Client) Ping(checkAllMetaServers bool) error { - c.mu.RLock() - server := c.metaServers[0] - c.mu.RUnlock() - url := c.url(server) + "/ping" - if checkAllMetaServers { - url = url + "?all=true" - } - - resp, err := http.Get(url) - if err != nil { - return err - } - defer resp.Body.Close() - - if resp.StatusCode == http.StatusOK { - return nil - } - - b, err := ioutil.ReadAll(resp.Body) - if err != nil { - return err - } - return fmt.Errorf(string(b)) -} - // AcquireLease attempts to acquire the specified lease. -// A lease is a logical concept that can be used by anything that needs to limit -// execution to a single node. E.g., the CQ service on all nodes may ask for -// the "ContinuousQuery" lease. Only the node that acquires it will run CQs. -// NOTE: Leases are not managed through the CP system and are not fully -// consistent. Any actions taken after acquiring a lease must be idempotent. -func (c *Client) AcquireLease(name string) (l *Lease, err error) { - for n := 1; n < 11; n++ { - if l, err = c.acquireLease(name); err == ErrServiceUnavailable || err == ErrService { - // exponential backoff - d := time.Duration(math.Pow(10, float64(n))) * time.Millisecond - time.Sleep(d) - continue - } - break - } - return -} - -func (c *Client) acquireLease(name string) (*Lease, error) { - c.mu.RLock() - server := c.metaServers[0] - c.mu.RUnlock() - url := fmt.Sprintf("%s/lease?name=%s&nodeid=%d", c.url(server), name, c.nodeID) - - resp, err := http.Get(url) - if err != nil { - return nil, err - } - defer resp.Body.Close() - - switch resp.StatusCode { - case http.StatusOK: - case http.StatusConflict: - err = errors.New("another node owns the lease") - case http.StatusServiceUnavailable: - return nil, ErrServiceUnavailable - case http.StatusBadRequest: - b, e := ioutil.ReadAll(resp.Body) - if e != nil { - return nil, e - } - return nil, fmt.Errorf("meta service: %s", string(b)) - case http.StatusInternalServerError: - return nil, errors.New("meta service internal error") - default: - return nil, errors.New("unrecognized meta service error") - } - - // Read lease JSON from response body. - b, e := ioutil.ReadAll(resp.Body) - if e != nil { - return nil, e +// TODO corylanou remove this for single node +func (c *Client) AcquireLease(name string) (*Lease, error) { + l := Lease{ + Name: name, + Expiration: time.Now().Add(DefaultLeaseDuration), } - // Unmarshal JSON into a Lease. - l := &Lease{} - if e = json.Unmarshal(b, l); e != nil { - return nil, e - } - - return l, err + return &l, nil } func (c *Client) data() *Data { c.mu.RLock() defer c.mu.RUnlock() - return c.cacheData + data := c.cacheData.Clone() + return data } // ClusterID returns the ID of the cluster it's connected to. @@ -227,106 +149,28 @@ func (c *Client) ClusterID() uint64 { return c.cacheData.ClusterID } -// Node returns a node by id. -func (c *Client) DataNode(id uint64) (*NodeInfo, error) { - for _, n := range c.data().DataNodes { - if n.ID == id { - return &n, nil - } - } - return nil, ErrNodeNotFound -} - -// DataNodes returns the data nodes' info. -func (c *Client) DataNodes() ([]NodeInfo, error) { - return c.data().DataNodes, nil -} - -// CreateDataNode will create a new data node in the metastore -func (c *Client) CreateDataNode(httpAddr, tcpAddr string) (*NodeInfo, error) { - cmd := &internal.CreateDataNodeCommand{ - HTTPAddr: proto.String(httpAddr), - TCPAddr: proto.String(tcpAddr), - } - - if err := c.retryUntilExec(internal.Command_CreateDataNodeCommand, internal.E_CreateDataNodeCommand_Command, cmd); err != nil { - return nil, err - } - - n, err := c.DataNodeByTCPHost(tcpAddr) - if err != nil { - return nil, err - } - - c.nodeID = n.ID - - return n, nil -} - -// DataNodeByHTTPHost returns the data node with the give http bind address -func (c *Client) DataNodeByHTTPHost(httpAddr string) (*NodeInfo, error) { - nodes, _ := c.DataNodes() - for _, n := range nodes { - if n.Host == httpAddr { - return &n, nil - } - } - - return nil, ErrNodeNotFound -} - -// DataNodeByTCPHost returns the data node with the give http bind address -func (c *Client) DataNodeByTCPHost(tcpAddr string) (*NodeInfo, error) { - nodes, _ := c.DataNodes() - for _, n := range nodes { - if n.TCPHost == tcpAddr { - return &n, nil - } - } - - return nil, ErrNodeNotFound -} - -// DeleteDataNode deletes a data node from the cluster. -func (c *Client) DeleteDataNode(id uint64) error { - cmd := &internal.DeleteDataNodeCommand{ - ID: proto.Uint64(id), - } - - return c.retryUntilExec(internal.Command_DeleteDataNodeCommand, internal.E_DeleteDataNodeCommand_Command, cmd) -} - -// MetaNodes returns the meta nodes' info. -func (c *Client) MetaNodes() ([]NodeInfo, error) { - return c.data().MetaNodes, nil -} - -// MetaNodeByAddr returns the meta node's info. -func (c *Client) MetaNodeByAddr(addr string) *NodeInfo { - for _, n := range c.data().MetaNodes { - if n.Host == addr { - return &n - } - } - return nil -} - // Database returns info for the requested database. func (c *Client) Database(name string) (*DatabaseInfo, error) { - for _, d := range c.data().Databases { + c.mu.RLock() + data := c.cacheData.Clone() + c.mu.RUnlock() + + for _, d := range data.Databases { if d.Name == name { return &d, nil } } - // Can't throw ErrDatabaseNotExists here since it would require some major - // work around catching the error when needed. Should be revisited. - return nil, nil + return nil, influxdb.ErrDatabaseNotFound(name) } // Databases returns a list of all database infos. func (c *Client) Databases() ([]DatabaseInfo, error) { - dbs := c.data().Databases + c.mu.RLock() + data := c.cacheData.Clone() + c.mu.RUnlock() + + dbs := data.Databases if dbs == nil { return []DatabaseInfo{}, nil } @@ -335,29 +179,50 @@ func (c *Client) Databases() ([]DatabaseInfo, error) { // CreateDatabase creates a database or returns it if it already exists func (c *Client) CreateDatabase(name string) (*DatabaseInfo, error) { - if db, _ := c.Database(name); db != nil { + c.mu.Lock() + defer c.mu.Unlock() + + data := c.cacheData.Clone() + + if db := data.Database(name); db != nil { return db, nil } - cmd := &internal.CreateDatabaseCommand{ - Name: proto.String(name), + if err := data.CreateDatabase(name); err != nil { + return nil, err } - err := c.retryUntilExec(internal.Command_CreateDatabaseCommand, internal.E_CreateDatabaseCommand_Command, cmd) - if err != nil { - return nil, err + // create default retention policy + if c.retentionAutoCreate { + if err := data.CreateRetentionPolicy(name, &RetentionPolicyInfo{ + Name: "default", + ReplicaN: 1, + }); err != nil { + return nil, err + } + if err := data.SetDefaultRetentionPolicy(name, "default"); err != nil { + return nil, err + } } - return c.Database(name) + db := data.Database(name) + + c.commit(data) + return db, nil } // CreateDatabaseWithRetentionPolicy creates a database with the specified retention policy. func (c *Client) CreateDatabaseWithRetentionPolicy(name string, rpi *RetentionPolicyInfo) (*DatabaseInfo, error) { + c.mu.Lock() + defer c.mu.Unlock() + + data := c.cacheData.Clone() + if rpi.Duration < MinRetentionPolicyDuration && rpi.Duration != 0 { return nil, ErrRetentionPolicyDurationTooLow } - if db, _ := c.Database(name); db != nil { + if db := data.Database(name); db != nil { // Check if the retention policy already exists. If it does and matches // the desired retention policy, exit with no error. if rp := db.RetentionPolicy(rpi.Name); rp != nil { @@ -368,31 +233,47 @@ func (c *Client) CreateDatabaseWithRetentionPolicy(name string, rpi *RetentionPo } } - cmd := &internal.CreateDatabaseCommand{ - Name: proto.String(name), - RetentionPolicy: rpi.marshal(), + if err := data.CreateDatabase(name); err != nil { + return nil, err } - err := c.retryUntilExec(internal.Command_CreateDatabaseCommand, internal.E_CreateDatabaseCommand_Command, cmd) - if err != nil { + if err := data.CreateRetentionPolicy(name, rpi); err != nil { return nil, err } - return c.Database(name) + if err := data.SetDefaultRetentionPolicy(name, rpi.Name); err != nil { + return nil, err + } + + db := data.Database(name) + + c.commit(data) + return db, nil } // DropDatabase deletes a database. func (c *Client) DropDatabase(name string) error { - cmd := &internal.DropDatabaseCommand{ - Name: proto.String(name), + c.mu.Lock() + defer c.mu.Unlock() + + data := c.cacheData.Clone() + + if err := data.DropDatabase(name); err != nil { + return err } - return c.retryUntilExec(internal.Command_DropDatabaseCommand, internal.E_DropDatabaseCommand_Command, cmd) + c.commit(data) + return nil } // CreateRetentionPolicy creates a retention policy on the specified database. func (c *Client) CreateRetentionPolicy(database string, rpi *RetentionPolicyInfo) (*RetentionPolicyInfo, error) { - if rp, _ := c.RetentionPolicy(database, rpi.Name); rp != nil { + c.mu.Lock() + defer c.mu.Unlock() + + data := c.cacheData.Clone() + + if rp, _ := data.RetentionPolicy(database, rpi.Name); rp != nil { return rp, nil } @@ -400,26 +281,26 @@ func (c *Client) CreateRetentionPolicy(database string, rpi *RetentionPolicyInfo return nil, ErrRetentionPolicyDurationTooLow } - cmd := &internal.CreateRetentionPolicyCommand{ - Database: proto.String(database), - RetentionPolicy: rpi.marshal(), + if err := data.CreateRetentionPolicy(database, rpi); err != nil { + return nil, err } - if err := c.retryUntilExec(internal.Command_CreateRetentionPolicyCommand, internal.E_CreateRetentionPolicyCommand_Command, cmd); err != nil { + rp, err := data.RetentionPolicy(database, rpi.Name) + if err != nil { return nil, err } - return c.RetentionPolicy(database, rpi.Name) + c.commit(data) + return rp, nil } // RetentionPolicy returns the requested retention policy info. func (c *Client) RetentionPolicy(database, name string) (rpi *RetentionPolicyInfo, err error) { - db, err := c.Database(database) - if err != nil { - return nil, err - } + c.mu.RLock() + data := c.cacheData.Clone() + c.mu.RUnlock() - // TODO: This should not be handled here + db := data.Database(database) if db == nil { return nil, influxdb.ErrDatabaseNotFound(database) } @@ -429,61 +310,55 @@ func (c *Client) RetentionPolicy(database, name string) (rpi *RetentionPolicyInf // DropRetentionPolicy drops a retention policy from a database. func (c *Client) DropRetentionPolicy(database, name string) error { - cmd := &internal.DropRetentionPolicyCommand{ - Database: proto.String(database), - Name: proto.String(name), + c.mu.Lock() + defer c.mu.Unlock() + + data := c.cacheData.Clone() + + if err := data.DropRetentionPolicy(database, name); err != nil { + return err } - return c.retryUntilExec(internal.Command_DropRetentionPolicyCommand, internal.E_DropRetentionPolicyCommand_Command, cmd) + c.commit(data) + return nil } // SetDefaultRetentionPolicy sets a database's default retention policy. func (c *Client) SetDefaultRetentionPolicy(database, name string) error { - cmd := &internal.SetDefaultRetentionPolicyCommand{ - Database: proto.String(database), - Name: proto.String(name), + c.mu.Lock() + defer c.mu.Unlock() + + data := c.cacheData.Clone() + + if err := data.SetDefaultRetentionPolicy(database, name); err != nil { + return err } - return c.retryUntilExec(internal.Command_SetDefaultRetentionPolicyCommand, internal.E_SetDefaultRetentionPolicyCommand_Command, cmd) + c.commit(data) + return nil } // UpdateRetentionPolicy updates a retention policy. func (c *Client) UpdateRetentionPolicy(database, name string, rpu *RetentionPolicyUpdate) error { - var newName *string - if rpu.Name != nil { - newName = rpu.Name - } - - var duration *int64 - if rpu.Duration != nil { - value := int64(*rpu.Duration) - duration = &value - } + c.mu.Lock() + defer c.mu.Unlock() - var replicaN *uint32 - if rpu.ReplicaN != nil { - value := uint32(*rpu.ReplicaN) - replicaN = &value - } + data := c.cacheData.Clone() - cmd := &internal.UpdateRetentionPolicyCommand{ - Database: proto.String(database), - Name: proto.String(name), - NewName: newName, - Duration: duration, - ReplicaN: replicaN, + if err := data.UpdateRetentionPolicy(database, name, rpu); err != nil { + return err } - return c.retryUntilExec(internal.Command_UpdateRetentionPolicyCommand, internal.E_UpdateRetentionPolicyCommand_Command, cmd) -} - -// IsLeader - should get rid of this -func (c *Client) IsLeader() bool { - return false + defer c.commit(data) + return nil } func (c *Client) Users() []UserInfo { - users := c.data().Users + c.mu.RLock() + data := c.cacheData.Clone() + c.mu.RUnlock() + + users := data.Users if users == nil { return []UserInfo{} @@ -492,7 +367,11 @@ func (c *Client) Users() []UserInfo { } func (c *Client) User(name string) (*UserInfo, error) { - for _, u := range c.data().Users { + c.mu.RLock() + data := c.cacheData.Clone() + c.mu.RUnlock() + + for _, u := range data.Users { if u.Name == name { return &u, nil } @@ -524,68 +403,97 @@ func (c *Client) saltedHash(password string) (salt, hash []byte, err error) { } func (c *Client) CreateUser(name, password string, admin bool) (*UserInfo, error) { + c.mu.Lock() + defer c.mu.Unlock() + + data := c.cacheData.Clone() + // Hash the password before serializing it. hash, err := bcrypt.GenerateFromPassword([]byte(password), bcryptCost) if err != nil { return nil, err } - if err := c.retryUntilExec(internal.Command_CreateUserCommand, internal.E_CreateUserCommand_Command, - &internal.CreateUserCommand{ - Name: proto.String(name), - Hash: proto.String(string(hash)), - Admin: proto.Bool(admin), - }, - ); err != nil { + if err := data.CreateUser(name, string(hash), admin); err != nil { return nil, err } - return c.User(name) + + u := data.User(name) + + c.commit(data) + return u, nil } func (c *Client) UpdateUser(name, password string) error { + c.mu.Lock() + defer c.mu.Unlock() + + data := c.cacheData.Clone() + // Hash the password before serializing it. hash, err := bcrypt.GenerateFromPassword([]byte(password), bcryptCost) if err != nil { return err } - return c.retryUntilExec(internal.Command_UpdateUserCommand, internal.E_UpdateUserCommand_Command, - &internal.UpdateUserCommand{ - Name: proto.String(name), - Hash: proto.String(string(hash)), - }, - ) + if err := data.UpdateUser(name, string(hash)); err != nil { + return nil + } + + delete(c.authCache, name) + + c.commit(data) + return nil } func (c *Client) DropUser(name string) error { - return c.retryUntilExec(internal.Command_DropUserCommand, internal.E_DropUserCommand_Command, - &internal.DropUserCommand{ - Name: proto.String(name), - }, - ) + c.mu.Lock() + defer c.mu.Unlock() + + data := c.cacheData.Clone() + + if err := data.DropUser(name); err != nil { + return err + } + + c.commit(data) + return nil } func (c *Client) SetPrivilege(username, database string, p influxql.Privilege) error { - return c.retryUntilExec(internal.Command_SetPrivilegeCommand, internal.E_SetPrivilegeCommand_Command, - &internal.SetPrivilegeCommand{ - Username: proto.String(username), - Database: proto.String(database), - Privilege: proto.Int32(int32(p)), - }, - ) + c.mu.Lock() + defer c.mu.Unlock() + + data := c.cacheData.Clone() + + if err := data.SetPrivilege(username, database, p); err != nil { + return err + } + + c.commit(data) + return nil } func (c *Client) SetAdminPrivilege(username string, admin bool) error { - return c.retryUntilExec(internal.Command_SetAdminPrivilegeCommand, internal.E_SetAdminPrivilegeCommand_Command, - &internal.SetAdminPrivilegeCommand{ - Username: proto.String(username), - Admin: proto.Bool(admin), - }, - ) + c.mu.Lock() + defer c.mu.Unlock() + + data := c.cacheData.Clone() + + if err := data.SetAdminPrivilege(username, admin); err != nil { + return err + } + + c.commit(data) + return nil } func (c *Client) UserPrivileges(username string) (map[string]influxql.Privilege, error) { - p, err := c.data().UserPrivileges(username) + c.mu.RLock() + data := c.cacheData.Clone() + c.mu.RUnlock() + + p, err := data.UserPrivileges(username) if err != nil { return nil, err } @@ -593,7 +501,11 @@ func (c *Client) UserPrivileges(username string) (map[string]influxql.Privilege, } func (c *Client) UserPrivilege(username, database string) (*influxql.Privilege, error) { - p, err := c.data().UserPrivilege(username, database) + c.mu.RLock() + data := c.cacheData.Clone() + c.mu.RUnlock() + + p, err := data.UserPrivilege(username, database) if err != nil { return nil, err } @@ -601,7 +513,11 @@ func (c *Client) UserPrivilege(username, database string) (*influxql.Privilege, } func (c *Client) AdminUserExists() bool { - for _, u := range c.data().Users { + c.mu.RLock() + data := c.cacheData.Clone() + c.mu.RUnlock() + + for _, u := range data.Users { if u.Admin { return true } @@ -610,11 +526,13 @@ func (c *Client) AdminUserExists() bool { } func (c *Client) Authenticate(username, password string) (*UserInfo, error) { - c.mu.Lock() - defer c.mu.Unlock() + c.mu.RLock() + defer c.mu.RUnlock() + + data := c.cacheData.Clone() // Find user. - userInfo := c.cacheData.User(username) + userInfo := data.User(username) if userInfo == nil { return nil, ErrUserNotFound } @@ -645,13 +563,19 @@ func (c *Client) Authenticate(username, password string) (*UserInfo, error) { } func (c *Client) UserCount() int { - return len(c.data().Users) + c.mu.RLock() + defer c.mu.RUnlock() + + return len(c.cacheData.Users) } // ShardIDs returns a list of all shard ids. func (c *Client) ShardIDs() []uint64 { + c.mu.RLock() + defer c.mu.RUnlock() + var a []uint64 - for _, dbi := range c.data().Databases { + for _, dbi := range c.cacheData.Databases { for _, rpi := range dbi.RetentionPolicies { for _, sgi := range rpi.ShardGroups { for _, si := range sgi.Shards { @@ -667,8 +591,11 @@ func (c *Client) ShardIDs() []uint64 { // ShardGroupsByTimeRange returns a list of all shard groups on a database and policy that may contain data // for the specified time range. Shard groups are sorted by start time. func (c *Client) ShardGroupsByTimeRange(database, policy string, min, max time.Time) (a []ShardGroupInfo, err error) { + c.mu.RLock() + defer c.mu.RUnlock() + // Find retention policy. - rpi, err := c.data().RetentionPolicy(database, policy) + rpi, err := c.cacheData.RetentionPolicy(database, policy) if err != nil { return nil, err } else if rpi == nil { @@ -714,39 +641,53 @@ func (c *Client) ShardsByTimeRange(sources influxql.Sources, tmin, tmax time.Tim // CreateShardGroup creates a shard group on a database and policy for a given timestamp. func (c *Client) CreateShardGroup(database, policy string, timestamp time.Time) (*ShardGroupInfo, error) { - if sg, _ := c.data().ShardGroupByTimestamp(database, policy, timestamp); sg != nil { - return sg, nil + c.mu.Lock() + defer c.mu.Unlock() + + data := c.cacheData.Clone() + + sgi, err := createShardGroup(data, database, policy, timestamp) + if err != nil { + return nil, err } - cmd := &internal.CreateShardGroupCommand{ - Database: proto.String(database), - Policy: proto.String(policy), - Timestamp: proto.Int64(timestamp.UnixNano()), + c.commit(data) + return sgi, nil +} + +func createShardGroup(data *Data, database, policy string, timestamp time.Time) (*ShardGroupInfo, error) { + if sg, _ := data.ShardGroupByTimestamp(database, policy, timestamp); sg != nil { + return sg, nil } - if err := c.retryUntilExec(internal.Command_CreateShardGroupCommand, internal.E_CreateShardGroupCommand_Command, cmd); err != nil { + if err := data.CreateShardGroup(database, policy, timestamp); err != nil { return nil, err } - rpi, err := c.RetentionPolicy(database, policy) + rpi, err := data.RetentionPolicy(database, policy) if err != nil { return nil, err } else if rpi == nil { return nil, errors.New("retention policy deleted after shard group created") } - return rpi.ShardGroupByTimestamp(timestamp), nil + sgi := rpi.ShardGroupByTimestamp(timestamp) + return sgi, nil } // DeleteShardGroup removes a shard group from a database and retention policy by id. func (c *Client) DeleteShardGroup(database, policy string, id uint64) error { - cmd := &internal.DeleteShardGroupCommand{ - Database: proto.String(database), - Policy: proto.String(policy), - ShardGroupID: proto.Uint64(id), + c.mu.Lock() + defer c.mu.Unlock() + + data := c.cacheData.Clone() + + if err := data.DeleteShardGroup(database, policy, id); err != nil { + return err } - return c.retryUntilExec(internal.Command_DeleteShardGroupCommand, internal.E_DeleteShardGroupCommand_Command, cmd) + c.commit(data) + return nil } // PrecreateShardGroups creates shard groups whose endtime is before the 'to' time passed in, but @@ -754,7 +695,11 @@ func (c *Client) DeleteShardGroup(database, policy string, id uint64) error { // for the corresponding time range arrives. Shard creation involves Raft consensus, and precreation // avoids taking the hit at write-time. func (c *Client) PrecreateShardGroups(from, to time.Time) error { - for _, di := range c.data().Databases { + c.mu.Lock() + defer c.mu.Unlock() + data := c.cacheData.Clone() + + for _, di := range data.Databases { for _, rp := range di.RetentionPolicies { if len(rp.ShardGroups) == 0 { // No data was ever written to this group, or all groups have been deleted. @@ -768,21 +713,26 @@ func (c *Client) PrecreateShardGroups(from, to time.Time) error { // Create successive shard group. nextShardGroupTime := g.EndTime.Add(1 * time.Nanosecond) - if newGroup, err := c.CreateShardGroup(di.Name, rp.Name, nextShardGroupTime); err != nil { + if newGroup, err := createShardGroup(data, di.Name, rp.Name, nextShardGroupTime); err != nil { c.logger.Printf("failed to precreate successive shard group for group %d: %s", g.ID, err.Error()) } else { c.logger.Printf("new shard group %d successfully precreated for database %s, retention policy %s", newGroup.ID, di.Name, rp.Name) } } } - return nil } + + c.commit(data) return nil } // ShardOwner returns the owning shard group info for a specific shard. func (c *Client) ShardOwner(shardID uint64) (database, policy string, sgi *ShardGroupInfo) { - for _, dbi := range c.data().Databases { + c.mu.RLock() + data := c.cacheData.Clone() + c.mu.RUnlock() + + for _, dbi := range data.Databases { for _, rpi := range dbi.RetentionPolicies { for _, g := range rpi.ShardGroups { if g.Deleted() { @@ -803,147 +753,76 @@ func (c *Client) ShardOwner(shardID uint64) (database, policy string, sgi *Shard return } -// JoinMetaServer will add the passed in tcpAddr to the raft peers and add a MetaNode to -// the metastore -func (c *Client) JoinMetaServer(httpAddr, tcpAddr string) (*NodeInfo, error) { - node := &NodeInfo{ - Host: httpAddr, - TCPHost: tcpAddr, - } - b, err := json.Marshal(node) - if err != nil { - return nil, err - } +func (c *Client) CreateContinuousQuery(database, name, query string) error { + c.mu.Lock() + defer c.mu.Unlock() - currentServer := 0 - redirectServer := "" - for { - // get the server to try to join against - var url string - if redirectServer != "" { - url = redirectServer - redirectServer = "" - } else { - c.mu.RLock() - - if currentServer >= len(c.metaServers) { - // We've tried every server, wait a second before - // trying again - time.Sleep(time.Second) - currentServer = 0 - } - server := c.metaServers[currentServer] - c.mu.RUnlock() + data := c.cacheData.Clone() - url = c.url(server) + "/join" - } + if err := data.CreateContinuousQuery(database, name, query); err != nil { + return err + } - resp, err := http.Post(url, "application/json", bytes.NewBuffer(b)) - if err != nil { - currentServer++ - continue - } + c.commit(data) + return nil +} - // Successfully joined - if resp.StatusCode == http.StatusOK { - defer resp.Body.Close() - if err := json.NewDecoder(resp.Body).Decode(&node); err != nil { - return nil, err - } - break - } - resp.Body.Close() +func (c *Client) DropContinuousQuery(database, name string) error { + c.mu.Lock() + defer c.mu.Unlock() - // We tried to join a meta node that was not the leader, rety at the node - // they think is the leader. - if resp.StatusCode == http.StatusTemporaryRedirect { - redirectServer = resp.Header.Get("Location") - continue - } + data := c.cacheData.Clone() - // Something failed, try the next node - currentServer++ + if err := data.DropContinuousQuery(database, name); err != nil { + return nil } - return node, nil + defer c.commit(data) + return nil } -func (c *Client) CreateMetaNode(httpAddr, tcpAddr string) (*NodeInfo, error) { - cmd := &internal.CreateMetaNodeCommand{ - HTTPAddr: proto.String(httpAddr), - TCPAddr: proto.String(tcpAddr), - Rand: proto.Uint64(uint64(rand.Int63())), - } +func (c *Client) CreateSubscription(database, rp, name, mode string, destinations []string) error { + c.mu.Lock() + defer c.mu.Unlock() - if err := c.retryUntilExec(internal.Command_CreateMetaNodeCommand, internal.E_CreateMetaNodeCommand_Command, cmd); err != nil { - return nil, err - } + data := c.cacheData.Clone() - n := c.MetaNodeByAddr(httpAddr) - if n == nil { - return nil, errors.New("new meta node not found") + if err := data.CreateSubscription(database, rp, name, mode, destinations); err != nil { + return err } - c.nodeID = n.ID - - return n, nil + c.commit(data) + return nil } -func (c *Client) DeleteMetaNode(id uint64) error { - cmd := &internal.DeleteMetaNodeCommand{ - ID: proto.Uint64(id), +func (c *Client) DropSubscription(database, rp, name string) error { + c.mu.Lock() + defer c.mu.Unlock() + + data := c.cacheData.Clone() + + if err := data.DropSubscription(database, rp, name); err != nil { + return err } - return c.retryUntilExec(internal.Command_DeleteMetaNodeCommand, internal.E_DeleteMetaNodeCommand_Command, cmd) + c.commit(data) + return nil } -func (c *Client) CreateContinuousQuery(database, name, query string) error { - return c.retryUntilExec(internal.Command_CreateContinuousQueryCommand, internal.E_CreateContinuousQueryCommand_Command, - &internal.CreateContinuousQueryCommand{ - Database: proto.String(database), - Name: proto.String(name), - Query: proto.String(query), - }, - ) -} +func (c *Client) SetData(data *Data) error { + c.mu.Lock() -func (c *Client) DropContinuousQuery(database, name string) error { - return c.retryUntilExec(internal.Command_DropContinuousQueryCommand, internal.E_DropContinuousQueryCommand_Command, - &internal.DropContinuousQueryCommand{ - Database: proto.String(database), - Name: proto.String(name), - }, - ) -} + // reset the index so the commit will fire a change event + c.cacheData.Index = 0 -func (c *Client) CreateSubscription(database, rp, name, mode string, destinations []string) error { - return c.retryUntilExec(internal.Command_CreateSubscriptionCommand, internal.E_CreateSubscriptionCommand_Command, - &internal.CreateSubscriptionCommand{ - Database: proto.String(database), - RetentionPolicy: proto.String(rp), - Name: proto.String(name), - Mode: proto.String(mode), - Destinations: destinations, - }, - ) -} + // increment the index to force the changed channel to fire + d := data.Clone() + d.Index++ + c.commit(d) -func (c *Client) DropSubscription(database, rp, name string) error { - return c.retryUntilExec(internal.Command_DropSubscriptionCommand, internal.E_DropSubscriptionCommand_Command, - &internal.DropSubscriptionCommand{ - Database: proto.String(database), - RetentionPolicy: proto.String(rp), - Name: proto.String(name), - }, - ) -} + c.mu.Unlock() -func (c *Client) SetData(data *Data) error { - return c.retryUntilExec(internal.Command_SetDataCommand, internal.E_SetDataCommand_Command, - &internal.SetDataCommand{ - Data: data.marshal(), - }, - ) + return nil } // WaitForDataChanged will return a channel that will get closed when @@ -954,6 +833,15 @@ func (c *Client) WaitForDataChanged() chan struct{} { return c.changed } +// commit assumes it is under a full lock +func (c *Client) commit(data *Data) { + data.Index++ + c.cacheData = data + c.Snapshot() + close(c.changed) + c.changed = make(chan struct{}) +} + func (c *Client) MarshalBinary() ([]byte, error) { c.mu.RLock() defer c.mu.RUnlock() @@ -966,257 +854,6 @@ func (c *Client) SetLogger(l *log.Logger) { c.logger = l } -func (c *Client) index() uint64 { - c.mu.RLock() - defer c.mu.RUnlock() - return c.cacheData.Index -} - -// retryUntilExec will attempt the command on each of the metaservers until it either succeeds or -// hits the max number of tries -func (c *Client) retryUntilExec(typ internal.Command_Type, desc *proto.ExtensionDesc, value interface{}) error { - var err error - var index uint64 - tries := 0 - currentServer := 0 - var redirectServer string - - for { - c.mu.RLock() - // exit if we're closed - select { - case <-c.closing: - c.mu.RUnlock() - return nil - default: - // we're still open, continue on - } - c.mu.RUnlock() - - // build the url to hit the redirect server or the next metaserver - var url string - if redirectServer != "" { - url = redirectServer - redirectServer = "" - } else { - c.mu.RLock() - if currentServer >= len(c.metaServers) { - currentServer = 0 - } - server := c.metaServers[currentServer] - c.mu.RUnlock() - - url = fmt.Sprintf("://%s/execute", server) - if c.tls { - url = "https" + url - } else { - url = "http" + url - } - } - - index, err = c.exec(url, typ, desc, value) - tries++ - currentServer++ - - if err == nil { - c.waitForIndex(index) - return nil - } - - if tries > maxRetries { - return err - } - - if e, ok := err.(errRedirect); ok { - redirectServer = e.host - continue - } - - if _, ok := err.(errCommand); ok { - return err - } - - time.Sleep(errSleep) - } -} - -func (c *Client) exec(url string, typ internal.Command_Type, desc *proto.ExtensionDesc, value interface{}) (index uint64, err error) { - // Create command. - cmd := &internal.Command{Type: &typ} - if err := proto.SetExtension(cmd, desc, value); err != nil { - panic(err) - } - - b, err := proto.Marshal(cmd) - if err != nil { - return 0, err - } - - resp, err := http.Post(url, "application/octet-stream", bytes.NewBuffer(b)) - if err != nil { - return 0, err - } - defer resp.Body.Close() - - // read the response - if resp.StatusCode == http.StatusTemporaryRedirect { - return 0, errRedirect{host: resp.Header.Get("Location")} - } else if resp.StatusCode != http.StatusOK { - return 0, fmt.Errorf("meta service returned %s", resp.Status) - } - - res := &internal.Response{} - - b, err = ioutil.ReadAll(resp.Body) - if err != nil { - return 0, err - } - - if err := proto.Unmarshal(b, res); err != nil { - return 0, err - } - es := res.GetError() - if es != "" { - return 0, errCommand{msg: es} - } - - return res.GetIndex(), nil -} - -func (c *Client) waitForIndex(idx uint64) { - for { - c.mu.RLock() - if c.cacheData.Index >= idx { - c.mu.RUnlock() - return - } - ch := c.changed - c.mu.RUnlock() - <-ch - } -} - -func (c *Client) pollForUpdates() { - for { - data := c.retryUntilSnapshot(c.index()) - if data == nil { - // this will only be nil if the client has been closed, - // so we can exit out - return - } - - // update the data and notify of the change - c.mu.Lock() - idx := c.cacheData.Index - c.cacheData = data - c.updateAuthCache() - if idx < data.Index { - close(c.changed) - c.changed = make(chan struct{}) - } - c.mu.Unlock() - } -} - -func (c *Client) getSnapshot(server string, index uint64) (*Data, error) { - resp, err := http.Get(c.url(server) + fmt.Sprintf("?index=%d", index)) - if err != nil { - return nil, err - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("meta server returned non-200: %s", resp.Status) - } - - b, err := ioutil.ReadAll(resp.Body) - if err != nil { - return nil, err - } - data := &Data{} - if err := data.UnmarshalBinary(b); err != nil { - return nil, err - } - - return data, nil -} - -// peers returns the TCPHost addresses of all the metaservers -func (c *Client) peers() []string { - - var peers Peers - // query each server and keep track of who their peers are - for _, server := range c.metaServers { - url := c.url(server) + "/peers" - resp, err := http.Get(url) - if err != nil { - continue - } - defer resp.Body.Close() - - // This meta-server might not be ready to answer, continue on - if resp.StatusCode != http.StatusOK { - continue - } - - dec := json.NewDecoder(resp.Body) - var p []string - if err := dec.Decode(&p); err != nil { - continue - } - peers = peers.Append(p...) - } - - // Return the unique set of peer addresses - return []string(peers.Unique()) -} - -func (c *Client) url(server string) string { - url := fmt.Sprintf("://%s", server) - - if c.tls { - url = "https" + url - } else { - url = "http" + url - } - - return url -} - -func (c *Client) retryUntilSnapshot(idx uint64) *Data { - currentServer := 0 - for { - // get the index to look from and the server to poll - c.mu.RLock() - - // exit if we're closed - select { - case <-c.closing: - c.mu.RUnlock() - return nil - default: - // we're still open, continue on - } - - if currentServer >= len(c.metaServers) { - currentServer = 0 - } - server := c.metaServers[currentServer] - c.mu.RUnlock() - - data, err := c.getSnapshot(server, idx) - - if err == nil { - return data - } - - c.logger.Printf("failure getting snapshot from %s: %s", server, err.Error()) - time.Sleep(errSleep) - - currentServer++ - } -} - func (c *Client) updateAuthCache() { // copy cached user info for still-present users newCache := make(map[string]authUser, len(c.authCache)) @@ -1232,46 +869,57 @@ func (c *Client) updateAuthCache() { c.authCache = newCache } -func (c *Client) MetaServers() []string { - return c.metaServers -} - -type Peers []string +// Snapshot will save the current meta data to disk +func (c *Client) Snapshot() error { + file := filepath.Join(c.path, metaFile) + tmpFile := file + "tmp" -func (peers Peers) Append(p ...string) Peers { - peers = append(peers, p...) + f, err := os.Create(tmpFile) + if err != nil { + return err + } + defer f.Close() - return peers.Unique() -} + var data []byte + if b, err := c.cacheData.MarshalBinary(); err != nil { + return err + } else { + data = b + } -func (peers Peers) Unique() Peers { - distinct := map[string]struct{}{} - for _, p := range peers { - distinct[p] = struct{}{} + if _, err := f.Write(data); err != nil { + return err } - var u Peers - for k := range distinct { - u = append(u, k) + if err = f.Close(); nil != err { + return err } - return u + + return os.Rename(tmpFile, file) } -func (peers Peers) Contains(peer string) bool { - for _, p := range peers { - if p == peer { - return true +// Load will save the current meta data from disk +func (c *Client) Load() error { + file := filepath.Join(c.path, metaFile) + + f, err := os.Open(file) + if err != nil { + if os.IsNotExist(err) { + return nil } + return err } - return false -} + defer f.Close() -type errRedirect struct { - host string -} + data, err := ioutil.ReadAll(f) + if err != nil { + return err + } -func (e errRedirect) Error() string { - return fmt.Sprintf("redirect to %s", e.host) + if err := c.cacheData.UnmarshalBinary(data); err != nil { + return err + } + return nil } type errCommand struct { diff --git a/services/meta/client_test.go b/services/meta/client_test.go new file mode 100644 index 00000000000..14e96478815 --- /dev/null +++ b/services/meta/client_test.go @@ -0,0 +1,776 @@ +package meta_test + +import ( + "encoding/json" + "io/ioutil" + "net" + "os" + "path" + "runtime" + "testing" + "time" + + "github.com/influxdata/influxdb" + + "github.com/influxdata/influxdb/influxql" + "github.com/influxdata/influxdb/services/meta" +) + +func TestMetaClient_CreateDatabaseOnly(t *testing.T) { + t.Parallel() + + d, c := newClient() + defer os.RemoveAll(d) + defer c.Close() + + if db, err := c.CreateDatabase("db0"); err != nil { + t.Fatal(err) + } else if db.Name != "db0" { + t.Fatalf("database name mismatch. exp: db0, got %s", db.Name) + } + + db, err := c.Database("db0") + if err != nil { + t.Fatal(err) + } else if db == nil { + t.Fatal("database not found") + } else if db.Name != "db0" { + t.Fatalf("db name wrong: %s", db.Name) + } + + // Make sure a default retention policy was created. + rp, err := c.RetentionPolicy("db0", "default") + if err != nil { + t.Fatal(err) + } else if rp == nil { + t.Fatal("failed to create rp") + } else if exp, got := "default", rp.Name; exp != got { + t.Fatalf("rp name wrong:\n\texp: %s\n\tgot: %s", exp, got) + } +} + +func TestMetaClient_CreateDatabaseIfNotExists(t *testing.T) { + t.Parallel() + + d, c := newClient() + defer os.RemoveAll(d) + defer c.Close() + + if _, err := c.CreateDatabase("db0"); err != nil { + t.Fatal(err) + } + + db, err := c.Database("db0") + if err != nil { + t.Fatal(err) + } else if db == nil { + t.Fatal("database not found") + } else if db.Name != "db0" { + t.Fatalf("db name wrong: %s", db.Name) + } + + if _, err := c.CreateDatabase("db0"); err != nil { + t.Fatal(err) + } +} + +func TestMetaClient_CreateDatabaseWithRetentionPolicy(t *testing.T) { + t.Parallel() + + d, c := newClient() + defer os.RemoveAll(d) + defer c.Close() + + if _, err := c.CreateDatabaseWithRetentionPolicy("db0", &meta.RetentionPolicyInfo{ + Name: "rp0", + Duration: 1 * time.Hour, + ReplicaN: 1, + }); err != nil { + t.Fatal(err) + } + + db, err := c.Database("db0") + if err != nil { + t.Fatal(err) + } else if db == nil { + t.Fatal("database not found") + } else if db.Name != "db0" { + t.Fatalf("db name wrong: %s", db.Name) + } + + rp := db.RetentionPolicy("rp0") + if err != nil { + t.Fatal(err) + } else if rp.Name != "rp0" { + t.Fatalf("rp name wrong: %s", rp.Name) + } else if rp.Duration != time.Hour { + t.Fatalf("rp duration wrong: %s", rp.Duration.String()) + } else if rp.ReplicaN != 1 { + t.Fatalf("rp replication wrong: %d", rp.ReplicaN) + } +} + +func TestMetaClient_Databases(t *testing.T) { + t.Parallel() + + d, c := newClient() + defer os.RemoveAll(d) + defer c.Close() + + // Create two databases. + db, err := c.CreateDatabase("db0") + if err != nil { + t.Fatal(err) + } else if db == nil { + t.Fatal("database not found") + } else if db.Name != "db0" { + t.Fatalf("db name wrong: %s", db.Name) + } + + db, err = c.CreateDatabase("db1") + if err != nil { + t.Fatal(err) + } else if db.Name != "db1" { + t.Fatalf("db name wrong: %s", db.Name) + } + + dbs, err := c.Databases() + if err != nil { + t.Fatal(err) + } + if len(dbs) != 2 { + t.Fatalf("expected 2 databases but got %d", len(dbs)) + } else if dbs[0].Name != "db0" { + t.Fatalf("db name wrong: %s", dbs[0].Name) + } else if dbs[1].Name != "db1" { + t.Fatalf("db name wrong: %s", dbs[1].Name) + } +} + +func TestMetaClient_DropDatabase(t *testing.T) { + t.Parallel() + + d, c := newClient() + defer os.RemoveAll(d) + defer c.Close() + + if _, err := c.CreateDatabase("db0"); err != nil { + t.Fatal(err) + } + + db, err := c.Database("db0") + if err != nil { + t.Fatal(err) + } else if db == nil { + t.Fatalf("database not found") + } else if db.Name != "db0" { + t.Fatalf("db name wrong: %s", db.Name) + } + + if err := c.DropDatabase("db0"); err != nil { + t.Fatal(err) + } + + if db, _ = c.Database("db0"); db != nil { + t.Fatalf("expected database to not return: %v", db) + } + + // Dropping a database that does not exist is not an error. + if err := c.DropDatabase("db foo"); err != nil { + t.Fatalf("got %v error, but expected no error", err) + } +} + +func TestMetaClient_CreateRetentionPolicy(t *testing.T) { + t.Parallel() + + d, c := newClient() + defer os.RemoveAll(d) + defer c.Close() + + if _, err := c.CreateDatabase("db0"); err != nil { + t.Fatal(err) + } + + db, err := c.Database("db0") + if err != nil { + t.Fatal(err) + } else if db == nil { + t.Fatal("database not found") + } else if db.Name != "db0" { + t.Fatalf("db name wrong: %s", db.Name) + } + + if _, err := c.CreateRetentionPolicy("db0", &meta.RetentionPolicyInfo{ + Name: "rp0", + Duration: 1 * time.Hour, + ReplicaN: 1, + }); err != nil { + t.Fatal(err) + } + + rp, err := c.RetentionPolicy("db0", "rp0") + if err != nil { + t.Fatal(err) + } else if rp.Name != "rp0" { + t.Fatalf("rp name wrong: %s", rp.Name) + } else if rp.Duration != time.Hour { + t.Fatalf("rp duration wrong: %s", rp.Duration.String()) + } else if rp.ReplicaN != 1 { + t.Fatalf("rp replication wrong: %d", rp.ReplicaN) + } + + // Create the same policy. Should not error. + if _, err := c.CreateRetentionPolicy("db0", &meta.RetentionPolicyInfo{ + Name: "rp0", + Duration: 1 * time.Hour, + ReplicaN: 1, + }); err != nil { + t.Fatal(err) + } + + rp, err = c.RetentionPolicy("db0", "rp0") + if err != nil { + t.Fatal(err) + } else if rp.Name != "rp0" { + t.Fatalf("rp name wrong: %s", rp.Name) + } else if rp.Duration != time.Hour { + t.Fatalf("rp duration wrong: %s", rp.Duration.String()) + } else if rp.ReplicaN != 1 { + t.Fatalf("rp replication wrong: %d", rp.ReplicaN) + } +} + +func TestMetaClient_SetDefaultRetentionPolicy(t *testing.T) { + t.Parallel() + + d, c := newClient() + defer os.RemoveAll(d) + defer c.Close() + + if _, err := c.CreateDatabaseWithRetentionPolicy("db0", &meta.RetentionPolicyInfo{ + Name: "rp0", + Duration: 1 * time.Hour, + ReplicaN: 1, + }); err != nil { + t.Fatal(err) + } + + db, err := c.Database("db0") + if err != nil { + t.Fatal(err) + } else if db == nil { + t.Fatal("datbase not found") + } else if db.Name != "db0" { + t.Fatalf("db name wrong: %s", db.Name) + } + + rp, err := c.RetentionPolicy("db0", "rp0") + if err != nil { + t.Fatal(err) + } else if rp.Name != "rp0" { + t.Fatalf("rp name wrong: %s", rp.Name) + } else if rp.Duration != time.Hour { + t.Fatalf("rp duration wrong: %s", rp.Duration.String()) + } else if rp.ReplicaN != 1 { + t.Fatalf("rp replication wrong: %d", rp.ReplicaN) + } + + // Make sure default retention policy is now rp0 + if exp, got := "rp0", db.DefaultRetentionPolicy; exp != got { + t.Fatalf("rp name wrong: \n\texp: %s\n\tgot: %s", exp, db.DefaultRetentionPolicy) + } +} + +func TestMetaClient_DropRetentionPolicy(t *testing.T) { + t.Parallel() + + d, c := newClient() + defer os.RemoveAll(d) + defer c.Close() + + if _, err := c.CreateDatabase("db0"); err != nil { + t.Fatal(err) + } + + db, err := c.Database("db0") + if err != nil { + t.Fatal(err) + } else if db == nil { + t.Fatal("database not found") + } else if db.Name != "db0" { + t.Fatalf("db name wrong: %s", db.Name) + } + + if _, err := c.CreateRetentionPolicy("db0", &meta.RetentionPolicyInfo{ + Name: "rp0", + Duration: 1 * time.Hour, + ReplicaN: 1, + }); err != nil { + t.Fatal(err) + } + + rp, err := c.RetentionPolicy("db0", "rp0") + if err != nil { + t.Fatal(err) + } else if rp.Name != "rp0" { + t.Fatalf("rp name wrong: %s", rp.Name) + } else if rp.Duration != time.Hour { + t.Fatalf("rp duration wrong: %s", rp.Duration.String()) + } else if rp.ReplicaN != 1 { + t.Fatalf("rp replication wrong: %d", rp.ReplicaN) + } + + if err := c.DropRetentionPolicy("db0", "rp0"); err != nil { + t.Fatal(err) + } + + rp, err = c.RetentionPolicy("db0", "rp0") + if err != nil { + t.Fatal(err) + } else if rp != nil { + t.Fatalf("rp should have been dropped") + } +} + +func TestMetaClient_CreateUser(t *testing.T) { + t.Parallel() + + d, c := newClient() + defer os.RemoveAll(d) + defer c.Close() + + // Create an admin user + if _, err := c.CreateUser("fred", "supersecure", true); err != nil { + t.Fatal(err) + } + + // Create a non-admin user + if _, err := c.CreateUser("wilma", "password", false); err != nil { + t.Fatal(err) + } + + u, err := c.User("fred") + if err != nil { + t.Fatal(err) + } + if exp, got := "fred", u.Name; exp != got { + t.Fatalf("unexpected user name: exp: %s got: %s", exp, got) + } + if !u.Admin { + t.Fatalf("expected user to be admin") + } + + u, err = c.Authenticate("fred", "supersecure") + if u == nil || err != nil || u.Name != "fred" { + t.Fatalf("failed to authenticate") + } + + // Auth for bad password should fail + u, err = c.Authenticate("fred", "badpassword") + if u != nil || err != meta.ErrAuthenticate { + t.Fatalf("authentication should fail with %s", meta.ErrAuthenticate) + } + + // Auth for no password should fail + u, err = c.Authenticate("fred", "") + if u != nil || err != meta.ErrAuthenticate { + t.Fatalf("authentication should fail with %s", meta.ErrAuthenticate) + } + + // Change password should succeed. + if err := c.UpdateUser("fred", "moresupersecure"); err != nil { + t.Fatal(err) + } + + // Auth for old password should fail + u, err = c.Authenticate("fred", "supersecure") + if u != nil || err != meta.ErrAuthenticate { + t.Fatalf("authentication should fail with %s", meta.ErrAuthenticate) + } + + // Auth for new password should succeed. + u, err = c.Authenticate("fred", "moresupersecure") + if u == nil || err != nil || u.Name != "fred" { + t.Fatalf("failed to authenticate") + } + + // Auth for unkonwn user should fail + u, err = c.Authenticate("foo", "") + if u != nil || err != meta.ErrUserNotFound { + t.Fatalf("authentication should fail with %s", meta.ErrUserNotFound) + } + + u, err = c.User("wilma") + if err != nil { + t.Fatal(err) + } + if exp, got := "wilma", u.Name; exp != got { + t.Fatalf("unexpected user name: exp: %s got: %s", exp, got) + } + if u.Admin { + t.Fatalf("expected user not to be an admin") + } + + if exp, got := 2, c.UserCount(); exp != got { + t.Fatalf("unexpected user count. got: %d exp: %d", got, exp) + } + + // Grant privilidges to a non-admin user + if err := c.SetAdminPrivilege("wilma", true); err != nil { + t.Fatal(err) + } + + u, err = c.User("wilma") + if err != nil { + t.Fatal(err) + } + if exp, got := "wilma", u.Name; exp != got { + t.Fatalf("unexpected user name: exp: %s got: %s", exp, got) + } + if !u.Admin { + t.Fatalf("expected user to be an admin") + } + + // Revoke privilidges from user + if err := c.SetAdminPrivilege("wilma", false); err != nil { + t.Fatal(err) + } + + u, err = c.User("wilma") + if err != nil { + t.Fatal(err) + } + if exp, got := "wilma", u.Name; exp != got { + t.Fatalf("unexpected user name: exp: %s got: %s", exp, got) + } + if u.Admin { + t.Fatalf("expected user not to be an admin") + } + + // Create a database to use for assiging privileges to. + if _, err := c.CreateDatabase("db0"); err != nil { + t.Fatal(err) + } + + db, err := c.Database("db0") + if err != nil { + t.Fatal(err) + } else if db.Name != "db0" { + t.Fatalf("db name wrong: %s", db.Name) + } + + // Assign a single privilege at the database level + if err := c.SetPrivilege("wilma", "db0", influxql.ReadPrivilege); err != nil { + t.Fatal(err) + } + + p, err := c.UserPrivilege("wilma", "db0") + if err != nil { + t.Fatal(err) + } + if p == nil { + t.Fatal("expected privilege but was nil") + } + if exp, got := influxql.ReadPrivilege, *p; exp != got { + t.Fatalf("unexpected privilege. exp: %d, got: %d", exp, got) + } + + // Remove a single privilege at the database level + if err := c.SetPrivilege("wilma", "db0", influxql.NoPrivileges); err != nil { + t.Fatal(err) + } + p, err = c.UserPrivilege("wilma", "db0") + if err != nil { + t.Fatal(err) + } + if p == nil { + t.Fatal("expected privilege but was nil") + } + if exp, got := influxql.NoPrivileges, *p; exp != got { + t.Fatalf("unexpected privilege. exp: %d, got: %d", exp, got) + } + + // Drop a user + if err := c.DropUser("wilma"); err != nil { + t.Fatal(err) + } + + u, err = c.User("wilma") + if err != meta.ErrUserNotFound { + t.Fatalf("user lookup should fail with %s", meta.ErrUserNotFound) + } + + if exp, got := 1, c.UserCount(); exp != got { + t.Fatalf("unexpected user count. got: %d exp: %d", got, exp) + } +} + +func TestMetaClient_ContinuousQueries(t *testing.T) { + t.Parallel() + + d, c := newClient() + defer os.RemoveAll(d) + defer c.Close() + + // Create a database to use + if _, err := c.CreateDatabase("db0"); err != nil { + t.Fatal(err) + } + db, err := c.Database("db0") + if err != nil { + t.Fatal(err) + } else if db == nil { + t.Fatalf("database not found") + } else if db.Name != "db0" { + t.Fatalf("db name wrong: %s", db.Name) + } + + // Create a CQ + if err := c.CreateContinuousQuery("db0", "cq0", `SELECT count(value) INTO foo_count FROM foo GROUP BY time(10m)`); err != nil { + t.Fatal(err) + } + + // Recreate an existing CQ + if err := c.CreateContinuousQuery("db0", "cq0", `SELECT max(value) INTO foo_max FROM foo GROUP BY time(10m)`); err == nil || err.Error() != `continuous query already exists` { + t.Fatalf("unexpected error: %s", err) + } + + // Create a few more CQ's + if err := c.CreateContinuousQuery("db0", "cq1", `SELECT max(value) INTO foo_max FROM foo GROUP BY time(10m)`); err != nil { + t.Fatal(err) + } + if err := c.CreateContinuousQuery("db0", "cq2", `SELECT min(value) INTO foo_min FROM foo GROUP BY time(10m)`); err != nil { + t.Fatal(err) + } + + // Drop a single CQ + if err := c.DropContinuousQuery("db0", "cq1"); err != nil { + t.Fatal(err) + } +} + +func TestMetaClient_Subscriptions_Create(t *testing.T) { + t.Parallel() + + d, c := newClient() + defer os.RemoveAll(d) + defer c.Close() + + // Create a database to use + if _, err := c.CreateDatabase("db0"); err != nil { + t.Fatal(err) + } + db, err := c.Database("db0") + if err != nil { + t.Fatal(err) + } else if db == nil { + t.Fatal("database not found") + } else if db.Name != "db0" { + t.Fatalf("db name wrong: %s", db.Name) + } + + // Create a subscription + if err := c.CreateSubscription("db0", "default", "sub0", "ALL", []string{"udp://example.com:9090"}); err != nil { + t.Fatal(err) + } + + // Re-create a subscription + if err := c.CreateSubscription("db0", "default", "sub0", "ALL", []string{"udp://example.com:9090"}); err == nil || err.Error() != `subscription already exists` { + t.Fatalf("unexpected error: %s", err) + } + + // Create another subscription. + if err := c.CreateSubscription("db0", "default", "sub1", "ALL", []string{"udp://example.com:6060"}); err != nil { + t.Fatal(err) + } +} + +func TestMetaClient_Subscriptions_Drop(t *testing.T) { + t.Parallel() + + d, c := newClient() + defer os.RemoveAll(d) + defer c.Close() + + // Create a database to use + if _, err := c.CreateDatabase("db0"); err != nil { + t.Fatal(err) + } + + // DROP SUBSCRIPTION returns ErrSubscriptionNotFound when the + // subscription is unknown. + err := c.DropSubscription("db0", "default", "foo") + if got, exp := err, meta.ErrSubscriptionNotFound; got == nil || got.Error() != exp.Error() { + t.Fatalf("got: %s, exp: %s", got, exp) + } + + // Create a subscription. + if err := c.CreateSubscription("db0", "default", "sub0", "ALL", []string{"udp://example.com:9090"}); err != nil { + t.Fatal(err) + } + + // DROP SUBSCRIPTION returns an influxdb.ErrDatabaseNotFound when + // the database is unknown. + err = c.DropSubscription("foo", "default", "sub0") + if got, exp := err, influxdb.ErrDatabaseNotFound("foo"); got.Error() != exp.Error() { + t.Fatalf("got: %s, exp: %s", got, exp) + } + + // DROP SUBSCRIPTION returns an influxdb.ErrRetentionPolicyNotFound + // when the retention policy is unknown. + err = c.DropSubscription("db0", "foo_policy", "sub0") + if got, exp := err, influxdb.ErrRetentionPolicyNotFound("foo_policy"); got.Error() != exp.Error() { + t.Fatalf("got: %s, exp: %s", got, exp) + } + + // DROP SUBSCRIPTION drops the subsciption if it can find it. + err = c.DropSubscription("db0", "default", "sub0") + if got := err; got != nil { + t.Fatalf("got: %s, exp: %v", got, nil) + } +} + +func TestMetaClient_Shards(t *testing.T) { + t.Parallel() + + d, c := newClient() + defer os.RemoveAll(d) + defer c.Close() + + if _, err := c.CreateDatabase("db0"); err != nil { + t.Fatal(err) + } + + // Test creating a shard group. + tmin := time.Now() + sg, err := c.CreateShardGroup("db0", "default", tmin) + if err != nil { + t.Fatal(err) + } else if sg == nil { + t.Fatalf("expected ShardGroup") + } + + // Test pre-creating shard groups. + dur := sg.EndTime.Sub(sg.StartTime) + time.Nanosecond + tmax := tmin.Add(dur) + if err := c.PrecreateShardGroups(tmin, tmax); err != nil { + t.Fatal(err) + } + + // Test finding shard groups by time range. + groups, err := c.ShardGroupsByTimeRange("db0", "default", tmin, tmax) + if err != nil { + t.Fatal(err) + } else if len(groups) != 2 { + t.Fatalf("wrong number of shard groups: %d", len(groups)) + } + + // Test finding shard owner. + db, rp, owner := c.ShardOwner(groups[0].Shards[0].ID) + if db != "db0" { + t.Fatalf("wrong db name: %s", db) + } else if rp != "default" { + t.Fatalf("wrong rp name: %s", rp) + } else if owner.ID != groups[0].ID { + t.Fatalf("wrong owner: exp %d got %d", groups[0].ID, owner.ID) + } + + // Test deleting a shard group. + if err := c.DeleteShardGroup("db0", "default", groups[0].ID); err != nil { + t.Fatal(err) + } else if groups, err = c.ShardGroupsByTimeRange("db0", "default", tmin, tmax); err != nil { + t.Fatal(err) + } else if len(groups) != 1 { + t.Fatalf("wrong number of shard groups after delete: %d", len(groups)) + } +} + +func TestMetaClient_PersistClusterIDAfterRestart(t *testing.T) { + t.Parallel() + + cfg := newConfig() + defer os.RemoveAll(cfg.Dir) + + c := meta.NewClient(cfg) + if err := c.Open(); err != nil { + t.Fatal(err) + } + id := c.ClusterID() + if id == 0 { + t.Fatal("cluster ID can't be zero") + } + + c = meta.NewClient(cfg) + if err := c.Open(); err != nil { + t.Fatal(err) + } + defer c.Close() + + idAfter := c.ClusterID() + if idAfter == 0 { + t.Fatal("cluster ID can't be zero") + } else if idAfter != id { + t.Fatalf("cluster id not the same: %d, %d", idAfter, id) + } +} + +func newClient() (string, *meta.Client) { + cfg := newConfig() + c := meta.NewClient(cfg) + if err := c.Open(); err != nil { + panic(err) + } + return cfg.Dir, c +} + +func newConfig() *meta.Config { + cfg := meta.NewConfig() + cfg.Dir = testTempDir(2) + return cfg +} + +func testTempDir(skip int) string { + // Get name of the calling function. + pc, _, _, ok := runtime.Caller(skip) + if !ok { + panic("failed to get name of test function") + } + _, prefix := path.Split(runtime.FuncForPC(pc).Name()) + // Make a temp dir prefixed with calling function's name. + dir, err := ioutil.TempDir(os.TempDir(), prefix) + if err != nil { + panic(err) + } + return dir +} + +func mustParseStatement(s string) influxql.Statement { + stmt, err := influxql.ParseStatement(s) + if err != nil { + panic(err) + } + return stmt +} + +func mustMarshalJSON(v interface{}) string { + b, e := json.Marshal(v) + if e != nil { + panic(e) + } + return string(b) +} + +func freePort() string { + l, _ := net.Listen("tcp", "127.0.0.1:0") + defer l.Close() + return l.Addr().String() +} + +func freePorts(i int) []string { + var ports []string + for j := 0; j < i; j++ { + ports = append(ports, freePort()) + } + return ports +} diff --git a/services/meta/config.go b/services/meta/config.go index 948b3493d67..5ad7a57cedf 100644 --- a/services/meta/config.go +++ b/services/meta/config.go @@ -2,40 +2,12 @@ package meta import ( "errors" - "net" "time" "github.com/influxdata/influxdb/toml" ) const ( - // DefaultEnabled is the default state for the meta service to run - DefaultEnabled = true - - // DefaultHostname is the default hostname if one is not provided. - DefaultHostname = "localhost" - - // DefaultRaftBindAddress is the default address to bind to. - DefaultRaftBindAddress = ":8088" - - // DefaultHTTPBindAddress is the default address to bind the API to. - DefaultHTTPBindAddress = ":8091" - - // DefaultHeartbeatTimeout is the default heartbeat timeout for the store. - DefaultHeartbeatTimeout = 1000 * time.Millisecond - - // DefaultElectionTimeout is the default election timeout for the store. - DefaultElectionTimeout = 1000 * time.Millisecond - - // DefaultLeaderLeaseTimeout is the default leader lease for the store. - DefaultLeaderLeaseTimeout = 500 * time.Millisecond - - // DefaultCommitTimeout is the default commit timeout for the store. - DefaultCommitTimeout = 50 * time.Millisecond - - // DefaultRaftPromotionEnabled is the default for auto promoting a node to a raft node when needed - DefaultRaftPromotionEnabled = true - // DefaultLeaseDuration is the default duration for leases. DefaultLeaseDuration = 60 * time.Second @@ -45,32 +17,11 @@ const ( // Config represents the meta configuration. type Config struct { - Enabled bool `toml:"enabled"` - Dir string `toml:"dir"` - - // RemoteHostname is the hostname portion to use when registering meta node - // addresses. This hostname must be resolvable from other nodes. - RemoteHostname string `toml:"-"` + Dir string `toml:"dir"` - // this is deprecated. Should use the address from run/config.go - BindAddress string `toml:"bind-address"` - - // HTTPBindAddress is the bind address for the metaservice HTTP API - HTTPBindAddress string `toml:"http-bind-address"` - HTTPSEnabled bool `toml:"https-enabled"` - HTTPSCertificate string `toml:"https-certificate"` - - // JoinPeers if specified gives other metastore servers to join this server to the cluster - JoinPeers []string `toml:"-"` - RetentionAutoCreate bool `toml:"retention-autocreate"` - ElectionTimeout toml.Duration `toml:"election-timeout"` - HeartbeatTimeout toml.Duration `toml:"heartbeat-timeout"` - LeaderLeaseTimeout toml.Duration `toml:"leader-lease-timeout"` - CommitTimeout toml.Duration `toml:"commit-timeout"` - ClusterTracing bool `toml:"cluster-tracing"` - RaftPromotionEnabled bool `toml:"raft-promotion-enabled"` - LoggingEnabled bool `toml:"logging-enabled"` - PprofEnabled bool `toml:"pprof-enabled"` + RetentionAutoCreate bool `toml:"retention-autocreate"` + LoggingEnabled bool `toml:"logging-enabled"` + PprofEnabled bool `toml:"pprof-enabled"` LeaseDuration toml.Duration `toml:"lease-duration"` } @@ -78,18 +29,9 @@ type Config struct { // NewConfig builds a new configuration with default values. func NewConfig() *Config { return &Config{ - Enabled: true, // enabled by default - BindAddress: DefaultRaftBindAddress, - HTTPBindAddress: DefaultHTTPBindAddress, - RetentionAutoCreate: true, - ElectionTimeout: toml.Duration(DefaultElectionTimeout), - HeartbeatTimeout: toml.Duration(DefaultHeartbeatTimeout), - LeaderLeaseTimeout: toml.Duration(DefaultLeaderLeaseTimeout), - CommitTimeout: toml.Duration(DefaultCommitTimeout), - RaftPromotionEnabled: DefaultRaftPromotionEnabled, - LeaseDuration: toml.Duration(DefaultLeaseDuration), - LoggingEnabled: DefaultLoggingEnabled, - JoinPeers: []string{}, + RetentionAutoCreate: true, + LeaseDuration: toml.Duration(DefaultLeaseDuration), + LoggingEnabled: DefaultLoggingEnabled, } } @@ -99,23 +41,3 @@ func (c *Config) Validate() error { } return nil } - -func (c *Config) defaultHost(addr string) string { - address, err := DefaultHost(DefaultHostname, addr) - if nil != err { - return addr - } - return address -} - -func DefaultHost(hostname, addr string) (string, error) { - host, port, err := net.SplitHostPort(addr) - if err != nil { - return "", err - } - - if host == "" || host == "0.0.0.0" || host == "::" { - return net.JoinHostPort(hostname, port), nil - } - return addr, nil -} diff --git a/services/meta/config_test.go b/services/meta/config_test.go index 8861324d358..5a6b46121a5 100644 --- a/services/meta/config_test.go +++ b/services/meta/config_test.go @@ -2,7 +2,6 @@ package meta_test import ( "testing" - "time" "github.com/BurntSushi/toml" "github.com/influxdata/influxdb/services/meta" @@ -12,33 +11,15 @@ func TestConfig_Parse(t *testing.T) { // Parse configuration. var c meta.Config if _, err := toml.Decode(` -enabled = false dir = "/tmp/foo" -election-timeout = "10s" -heartbeat-timeout = "20s" -leader-lease-timeout = "30h" -commit-timeout = "40m" -raft-promotion-enabled = false logging-enabled = false `, &c); err != nil { t.Fatal(err) } // Validate configuration. - if c.Enabled == true { - t.Fatalf("unexpected enabled: %v", c.Enabled) - } else if c.Dir != "/tmp/foo" { + if c.Dir != "/tmp/foo" { t.Fatalf("unexpected dir: %s", c.Dir) - } else if time.Duration(c.ElectionTimeout) != 10*time.Second { - t.Fatalf("unexpected election timeout: %v", c.ElectionTimeout) - } else if time.Duration(c.HeartbeatTimeout) != 20*time.Second { - t.Fatalf("unexpected heartbeat timeout: %v", c.HeartbeatTimeout) - } else if time.Duration(c.LeaderLeaseTimeout) != 30*time.Hour { - t.Fatalf("unexpected leader lease timeout: %v", c.LeaderLeaseTimeout) - } else if time.Duration(c.CommitTimeout) != 40*time.Minute { - t.Fatalf("unexpected commit timeout: %v", c.CommitTimeout) - } else if c.RaftPromotionEnabled { - t.Fatalf("unexpected raft promotion enabled: %v", c.RaftPromotionEnabled) } else if c.LoggingEnabled { t.Fatalf("unexpected logging enabled: %v", c.LoggingEnabled) } diff --git a/services/meta/data.go b/services/meta/data.go index 6a6c57e7b28..fcce424b275 100644 --- a/services/meta/data.go +++ b/services/meta/data.go @@ -31,172 +31,13 @@ type Data struct { Term uint64 // associated raft term Index uint64 // associated raft index ClusterID uint64 - MetaNodes []NodeInfo - DataNodes []NodeInfo Databases []DatabaseInfo Users []UserInfo - MaxNodeID uint64 MaxShardGroupID uint64 MaxShardID uint64 } -// DataNode returns a node by id. -func (data *Data) DataNode(id uint64) *NodeInfo { - for i := range data.DataNodes { - if data.DataNodes[i].ID == id { - return &data.DataNodes[i] - } - } - return nil -} - -// CreateDataNode adds a node to the metadata. -func (data *Data) CreateDataNode(host, tcpHost string) error { - // Ensure a node with the same host doesn't already exist. - for _, n := range data.DataNodes { - if n.TCPHost == tcpHost { - return ErrNodeExists - } - } - - // If an existing meta node exists with the same TCPHost address, - // then these nodes are actually the same so re-use the existing ID - var existingID uint64 - for _, n := range data.MetaNodes { - if n.TCPHost == tcpHost { - existingID = n.ID - break - } - } - - // We didn't find an existing node, so assign it a new node ID - if existingID == 0 { - data.MaxNodeID++ - existingID = data.MaxNodeID - } - - // Append new node. - data.DataNodes = append(data.DataNodes, NodeInfo{ - ID: existingID, - Host: host, - TCPHost: tcpHost, - }) - sort.Sort(NodeInfos(data.DataNodes)) - - return nil -} - -// SetDataNode adds a data node with a pre-specified nodeID. -// this should only be used when the cluster is upgrading from 0.9 to 0.10 -func (data *Data) SetDataNode(nodeID uint64, host, tcpHost string) error { - // Ensure a node with the same host doesn't already exist. - for _, n := range data.DataNodes { - if n.Host == host { - return ErrNodeExists - } - } - - // Append new node. - data.DataNodes = append(data.DataNodes, NodeInfo{ - ID: nodeID, - Host: host, - TCPHost: tcpHost, - }) - - return nil -} - -// DeleteDataNode removes a node from the Meta store. -// -// If necessary, DeleteDataNode reassigns ownership of any shards that -// would otherwise become orphaned by the removal of the node from the -// cluster. -func (data *Data) DeleteDataNode(id uint64) error { - var nodes []NodeInfo - - // Remove the data node from the store's list. - for _, n := range data.DataNodes { - if n.ID != id { - nodes = append(nodes, n) - } - } - - if len(nodes) == len(data.DataNodes) { - return ErrNodeNotFound - } - data.DataNodes = nodes - - // Remove node id from all shard infos - for di, d := range data.Databases { - for ri, rp := range d.RetentionPolicies { - for sgi, sg := range rp.ShardGroups { - var ( - nodeOwnerFreqs = make(map[int]int) - orphanedShards []ShardInfo - ) - // Look through all shards in the shard group and - // determine (1) if a shard no longer has any owners - // (orphaned); (2) if all shards in the shard group - // are orphaned; and (3) the number of shards in this - // group owned by each data node in the cluster. - for si, s := range sg.Shards { - // Track of how many shards in the group are - // owned by each data node in the cluster. - var nodeIdx = -1 - for i, owner := range s.Owners { - if owner.NodeID == id { - nodeIdx = i - } - nodeOwnerFreqs[int(owner.NodeID)]++ - } - - if nodeIdx > -1 { - // Data node owns shard, so relinquish ownership - // and set new owners on the shard. - s.Owners = append(s.Owners[:nodeIdx], s.Owners[nodeIdx+1:]...) - data.Databases[di].RetentionPolicies[ri].ShardGroups[sgi].Shards[si].Owners = s.Owners - } - - // Shard no longer owned. Will need reassigning - // an owner. - if len(s.Owners) == 0 { - orphanedShards = append(orphanedShards, s) - } - } - - // Mark the shard group as deleted if it has no shards, - // or all of its shards are orphaned. - if len(sg.Shards) == 0 || len(orphanedShards) == len(sg.Shards) { - data.Databases[di].RetentionPolicies[ri].ShardGroups[sgi].DeletedAt = time.Now().UTC() - continue - } - - // Reassign any orphaned shards. Delete the node we're - // dropping from the list of potential new owners. - delete(nodeOwnerFreqs, int(id)) - - for _, orphan := range orphanedShards { - newOwnerID, err := newShardOwner(orphan, nodeOwnerFreqs) - if err != nil { - return err - } - - for si, s := range sg.Shards { - if s.ID == orphan.ID { - sg.Shards[si].Owners = append(sg.Shards[si].Owners, ShardOwner{NodeID: newOwnerID}) - data.Databases[di].RetentionPolicies[ri].ShardGroups[sgi].Shards = sg.Shards - break - } - } - - } - } - } - } - return nil -} - // newShardOwner sets the owner of the provided shard to the data node // that currently owns the fewest number of shards. If multiple nodes // own the same (fewest) number of shards, then one of those nodes @@ -223,94 +64,6 @@ func newShardOwner(s ShardInfo, ownerFreqs map[int]int) (uint64, error) { return uint64(minId), nil } -// MetaNode returns a node by id. -func (data *Data) MetaNode(id uint64) *NodeInfo { - for i := range data.MetaNodes { - if data.MetaNodes[i].ID == id { - return &data.MetaNodes[i] - } - } - return nil -} - -// CreateMetaNode will add a new meta node to the metastore -func (data *Data) CreateMetaNode(httpAddr, tcpAddr string) error { - // Ensure a node with the same host doesn't already exist. - for _, n := range data.MetaNodes { - if n.Host == httpAddr { - return ErrNodeExists - } - } - - // If an existing data node exists with the same TCPHost address, - // then these nodes are actually the same so re-use the existing ID - var existingID uint64 - for _, n := range data.DataNodes { - if n.TCPHost == tcpAddr { - existingID = n.ID - break - } - } - - // We didn't find and existing data node ID, so assign a new ID - // to this meta node. - if existingID == 0 { - data.MaxNodeID++ - existingID = data.MaxNodeID - } - - // Append new node. - data.MetaNodes = append(data.MetaNodes, NodeInfo{ - ID: existingID, - Host: httpAddr, - TCPHost: tcpAddr, - }) - - sort.Sort(NodeInfos(data.MetaNodes)) - return nil -} - -// SetMetaNode will update the information for the single meta -// node or create a new metanode. If there are more than 1 meta -// nodes already, an error will be returned -func (data *Data) SetMetaNode(httpAddr, tcpAddr string) error { - if len(data.MetaNodes) > 1 { - return fmt.Errorf("can't set meta node when there are more than 1 in the metastore") - } - - if len(data.MetaNodes) == 0 { - return data.CreateMetaNode(httpAddr, tcpAddr) - } - - data.MetaNodes[0].Host = httpAddr - data.MetaNodes[0].TCPHost = tcpAddr - - return nil -} - -// DeleteMetaNode will remove the meta node from the store -func (data *Data) DeleteMetaNode(id uint64) error { - // Node has to be larger than 0 to be real - if id == 0 { - return ErrNodeIDRequired - } - - var nodes []NodeInfo - for _, n := range data.MetaNodes { - if n.ID == id { - continue - } - nodes = append(nodes, n) - } - - if len(nodes) == len(data.MetaNodes) { - return ErrNodeNotFound - } - - data.MetaNodes = nodes - return nil -} - // Database returns a database by name. func (data *Data) Database(name string) *DatabaseInfo { for i := range data.Databases { @@ -553,11 +306,6 @@ func (data *Data) ShardGroupByTimestamp(database, policy string, timestamp time. // CreateShardGroup creates a shard group on a database and policy for a given timestamp. func (data *Data) CreateShardGroup(database, policy string, timestamp time.Time) error { - // Ensure there are nodes in the metadata. - if len(data.DataNodes) == 0 { - return nil - } - // Find retention policy. rpi, err := data.RetentionPolicy(database, policy) if err != nil { @@ -571,19 +319,6 @@ func (data *Data) CreateShardGroup(database, policy string, timestamp time.Time) return nil } - // Require at least one replica but no more replicas than nodes. - replicaN := rpi.ReplicaN - if replicaN == 0 { - replicaN = 1 - } else if replicaN > len(data.DataNodes) { - replicaN = len(data.DataNodes) - } - - // Determine shard count by node count divided by replication factor. - // This will ensure nodes will get distributed across nodes evenly and - // replicated the correct number of times. - shardN := len(data.DataNodes) / replicaN - // Create the shard group. data.MaxShardGroupID++ sgi := ShardGroupInfo{} @@ -591,23 +326,9 @@ func (data *Data) CreateShardGroup(database, policy string, timestamp time.Time) sgi.StartTime = timestamp.Truncate(rpi.ShardGroupDuration).UTC() sgi.EndTime = sgi.StartTime.Add(rpi.ShardGroupDuration).UTC() - // Create shards on the group. - sgi.Shards = make([]ShardInfo, shardN) - for i := range sgi.Shards { - data.MaxShardID++ - sgi.Shards[i] = ShardInfo{ID: data.MaxShardID} - } - - // Assign data nodes to shards via round robin. - // Start from a repeatably "random" place in the node list. - nodeIndex := int(data.Index % uint64(len(data.DataNodes))) - for i := range sgi.Shards { - si := &sgi.Shards[i] - for j := 0; j < replicaN; j++ { - nodeID := data.DataNodes[nodeIndex%len(data.DataNodes)].ID - si.Owners = append(si.Owners, ShardOwner{NodeID: nodeID}) - nodeIndex++ - } + data.MaxShardID++ + sgi.Shards = []ShardInfo{ + {ID: data.MaxShardID}, } // Retention policy has a new shard group, so update the policy. Shard @@ -831,21 +552,6 @@ func (data *Data) UserPrivilege(name, database string) (*influxql.Privilege, err func (data *Data) Clone() *Data { other := *data - // Copy nodes. - if data.DataNodes != nil { - other.DataNodes = make([]NodeInfo, len(data.DataNodes)) - for i := range data.DataNodes { - other.DataNodes[i] = data.DataNodes[i].clone() - } - } - - if data.MetaNodes != nil { - other.MetaNodes = make([]NodeInfo, len(data.MetaNodes)) - for i := range data.MetaNodes { - other.MetaNodes[i] = data.MetaNodes[i].clone() - } - } - // Deep copy databases. if data.Databases != nil { other.Databases = make([]DatabaseInfo, len(data.Databases)) @@ -872,19 +578,11 @@ func (data *Data) marshal() *internal.Data { Index: proto.Uint64(data.Index), ClusterID: proto.Uint64(data.ClusterID), - MaxNodeID: proto.Uint64(data.MaxNodeID), MaxShardGroupID: proto.Uint64(data.MaxShardGroupID), MaxShardID: proto.Uint64(data.MaxShardID), - } - - pb.DataNodes = make([]*internal.NodeInfo, len(data.DataNodes)) - for i := range data.DataNodes { - pb.DataNodes[i] = data.DataNodes[i].marshal() - } - pb.MetaNodes = make([]*internal.NodeInfo, len(data.MetaNodes)) - for i := range data.MetaNodes { - pb.MetaNodes[i] = data.MetaNodes[i].marshal() + // Need this for reverse compatibility + MaxNodeID: proto.Uint64(0), } pb.Databases = make([]*internal.DatabaseInfo, len(data.Databases)) @@ -906,28 +604,9 @@ func (data *Data) unmarshal(pb *internal.Data) { data.Index = pb.GetIndex() data.ClusterID = pb.GetClusterID() - data.MaxNodeID = pb.GetMaxNodeID() data.MaxShardGroupID = pb.GetMaxShardGroupID() data.MaxShardID = pb.GetMaxShardID() - // TODO: Nodes is deprecated. This is being left here to make migration from 0.9.x to 0.10.0 possible - if len(pb.GetNodes()) > 0 { - data.DataNodes = make([]NodeInfo, len(pb.GetNodes())) - for i, x := range pb.GetNodes() { - data.DataNodes[i].unmarshal(x) - } - } else { - data.DataNodes = make([]NodeInfo, len(pb.GetDataNodes())) - for i, x := range pb.GetDataNodes() { - data.DataNodes[i].unmarshal(x) - } - } - - data.MetaNodes = make([]NodeInfo, len(pb.GetMetaNodes())) - for i, x := range pb.GetMetaNodes() { - data.MetaNodes[i].unmarshal(x) - } - data.Databases = make([]DatabaseInfo, len(pb.GetDatabases())) for i, x := range pb.GetDatabases() { data.Databases[i].unmarshal(x) diff --git a/services/meta/handler.go b/services/meta/handler.go deleted file mode 100644 index 82d10f47c61..00000000000 --- a/services/meta/handler.go +++ /dev/null @@ -1,481 +0,0 @@ -package meta - -import ( - "compress/gzip" - "encoding/json" - "errors" - "fmt" - "io" - "io/ioutil" - "log" - "net/http" - "os" - "runtime" - "strconv" - "strings" - "sync" - "time" - - "github.com/gogo/protobuf/proto" - "github.com/hashicorp/raft" - "github.com/influxdata/influxdb/services/meta/internal" - "github.com/influxdata/influxdb/uuid" -) - -// handler represents an HTTP handler for the meta service. -type handler struct { - config *Config - - logger *log.Logger - loggingEnabled bool // Log every HTTP access. - pprofEnabled bool - store interface { - afterIndex(index uint64) <-chan struct{} - index() uint64 - leader() string - leaderHTTP() string - snapshot() (*Data, error) - apply(b []byte) error - join(n *NodeInfo) (*NodeInfo, error) - otherMetaServersHTTP() []string - peers() []string - } - s *Service - - mu sync.RWMutex - closing chan struct{} - leases *Leases -} - -// newHandler returns a new instance of handler with routes. -func newHandler(c *Config, s *Service) *handler { - h := &handler{ - s: s, - config: c, - logger: log.New(os.Stderr, "[meta-http] ", log.LstdFlags), - loggingEnabled: c.ClusterTracing, - closing: make(chan struct{}), - leases: NewLeases(time.Duration(c.LeaseDuration)), - } - - return h -} - -// SetRoutes sets the provided routes on the handler. -func (h *handler) WrapHandler(name string, hf http.HandlerFunc) http.Handler { - var handler http.Handler - handler = http.HandlerFunc(hf) - handler = gzipFilter(handler) - handler = versionHeader(handler, h) - handler = requestID(handler) - if h.loggingEnabled { - handler = logging(handler, name, h.logger) - } - handler = recovery(handler, name, h.logger) // make sure recovery is always last - - return handler -} - -// ServeHTTP responds to HTTP request to the handler. -func (h *handler) ServeHTTP(w http.ResponseWriter, r *http.Request) { - switch r.Method { - case "GET": - switch r.URL.Path { - case "/ping": - h.WrapHandler("ping", h.servePing).ServeHTTP(w, r) - case "/lease": - h.WrapHandler("lease", h.serveLease).ServeHTTP(w, r) - case "/peers": - h.WrapHandler("peers", h.servePeers).ServeHTTP(w, r) - default: - h.WrapHandler("snapshot", h.serveSnapshot).ServeHTTP(w, r) - } - case "POST": - h.WrapHandler("execute", h.serveExec).ServeHTTP(w, r) - default: - http.Error(w, "", http.StatusBadRequest) - } -} - -func (h *handler) Close() error { - h.mu.Lock() - defer h.mu.Unlock() - select { - case <-h.closing: - // do nothing here - default: - close(h.closing) - } - return nil -} - -func (h *handler) isClosed() bool { - h.mu.RLock() - defer h.mu.RUnlock() - select { - case <-h.closing: - return true - default: - return false - } -} - -// serveExec executes the requested command. -func (h *handler) serveExec(w http.ResponseWriter, r *http.Request) { - if h.isClosed() { - h.httpError(fmt.Errorf("server closed"), w, http.StatusServiceUnavailable) - return - } - - // Read the command from the request body. - body, err := ioutil.ReadAll(r.Body) - if err != nil { - h.httpError(err, w, http.StatusInternalServerError) - return - } - - if r.URL.Path == "/join" { - n := &NodeInfo{} - if err := json.Unmarshal(body, n); err != nil { - h.httpError(err, w, http.StatusInternalServerError) - return - } - - node, err := h.store.join(n) - if err == raft.ErrNotLeader { - l := h.store.leaderHTTP() - if l == "" { - // No cluster leader. Client will have to try again later. - h.httpError(errors.New("no leader"), w, http.StatusServiceUnavailable) - return - } - scheme := "http://" - if h.config.HTTPSEnabled { - scheme = "https://" - } - - l = scheme + l + "/join" - http.Redirect(w, r, l, http.StatusTemporaryRedirect) - return - } - - if err != nil { - h.httpError(err, w, http.StatusInternalServerError) - return - } - - // Return the node with newly assigned ID as json - w.Header().Add("Content-Type", "application/json") - if err := json.NewEncoder(w).Encode(node); err != nil { - h.httpError(err, w, http.StatusInternalServerError) - } - - return - } - - // Make sure it's a valid command. - if err := validateCommand(body); err != nil { - h.httpError(err, w, http.StatusBadRequest) - return - } - - // Apply the command to the store. - var resp *internal.Response - if err := h.store.apply(body); err != nil { - // If we aren't the leader, redirect client to the leader. - if err == raft.ErrNotLeader { - l := h.store.leaderHTTP() - if l == "" { - // No cluster leader. Client will have to try again later. - h.httpError(errors.New("no leader"), w, http.StatusServiceUnavailable) - return - } - scheme := "http://" - if h.config.HTTPSEnabled { - scheme = "https://" - } - - l = scheme + l + "/execute" - http.Redirect(w, r, l, http.StatusTemporaryRedirect) - return - } - - // Error wasn't a leadership error so pass it back to client. - resp = &internal.Response{ - OK: proto.Bool(false), - Error: proto.String(err.Error()), - } - } else { - // Apply was successful. Return the new store index to the client. - resp = &internal.Response{ - OK: proto.Bool(false), - Index: proto.Uint64(h.store.index()), - } - } - - // Marshal the response. - b, err := proto.Marshal(resp) - if err != nil { - h.httpError(err, w, http.StatusInternalServerError) - return - } - - // Send response to client. - w.Header().Add("Content-Type", "application/octet-stream") - w.Write(b) -} - -func validateCommand(b []byte) error { - // Ensure command can be deserialized before applying. - if err := proto.Unmarshal(b, &internal.Command{}); err != nil { - return fmt.Errorf("unable to unmarshal command: %s", err) - } - - return nil -} - -// serveSnapshot is a long polling http connection to server cache updates -func (h *handler) serveSnapshot(w http.ResponseWriter, r *http.Request) { - if h.isClosed() { - h.httpError(fmt.Errorf("server closed"), w, http.StatusInternalServerError) - return - } - - // get the current index that client has - index, err := strconv.ParseUint(r.URL.Query().Get("index"), 10, 64) - if err != nil { - http.Error(w, "error parsing index", http.StatusBadRequest) - } - - select { - case <-h.store.afterIndex(index): - // Send updated snapshot to client. - ss, err := h.store.snapshot() - if err != nil { - h.httpError(err, w, http.StatusInternalServerError) - return - } - b, err := ss.MarshalBinary() - if err != nil { - h.httpError(err, w, http.StatusInternalServerError) - return - } - w.Header().Add("Content-Type", "application/octet-stream") - w.Write(b) - return - case <-w.(http.CloseNotifier).CloseNotify(): - // Client closed the connection so we're done. - return - case <-h.closing: - h.httpError(fmt.Errorf("server closed"), w, http.StatusInternalServerError) - return - } -} - -// servePing will return if the server is up, or if specified will check the status -// of the other metaservers as well -func (h *handler) servePing(w http.ResponseWriter, r *http.Request) { - // if they're not asking to check all servers, just return who we think - // the leader is - if r.URL.Query().Get("all") == "" { - w.Write([]byte(h.store.leader())) - return - } - - leader := h.store.leader() - healthy := true - for _, n := range h.store.otherMetaServersHTTP() { - scheme := "http://" - if h.config.HTTPSEnabled { - scheme = "https://" - } - url := scheme + n + "/ping" - - resp, err := http.Get(url) - if err != nil { - healthy = false - break - } - - defer resp.Body.Close() - b, err := ioutil.ReadAll(resp.Body) - if err != nil { - healthy = false - break - } - - if leader != string(b) { - healthy = false - break - } - } - - if healthy { - w.Write([]byte(h.store.leader())) - return - } - - h.httpError(fmt.Errorf("one or more metaservers not up"), w, http.StatusInternalServerError) -} - -func (h *handler) servePeers(w http.ResponseWriter, r *http.Request) { - w.Header().Add("Content-Type", "application/json") - enc := json.NewEncoder(w) - if err := enc.Encode(h.store.peers()); err != nil { - h.httpError(err, w, http.StatusInternalServerError) - } -} - -// serveLease -func (h *handler) serveLease(w http.ResponseWriter, r *http.Request) { - var name, nodeIDStr string - q := r.URL.Query() - - // Get the requested lease name. - name = q.Get("name") - if name == "" { - http.Error(w, "lease name required", http.StatusBadRequest) - return - } - - // Get the ID of the requesting node. - nodeIDStr = q.Get("nodeid") - if nodeIDStr == "" { - http.Error(w, "node ID required", http.StatusBadRequest) - return - } - - // Redirect to leader if necessary. - leader := h.store.leaderHTTP() - if leader != h.s.httpAddr { - if leader == "" { - // No cluster leader. Client will have to try again later. - h.httpError(errors.New("no leader"), w, http.StatusServiceUnavailable) - return - } - scheme := "http://" - if h.config.HTTPSEnabled { - scheme = "https://" - } - - leader = scheme + leader + "/lease?" + q.Encode() - http.Redirect(w, r, leader, http.StatusTemporaryRedirect) - return - } - - // Convert node ID to an int. - nodeID, err := strconv.ParseUint(nodeIDStr, 10, 64) - if err != nil { - http.Error(w, "invalid node ID", http.StatusBadRequest) - return - } - - // Try to acquire the requested lease. - // Always returns a lease. err determins if we own it. - l, err := h.leases.Acquire(name, nodeID) - // Marshal the lease to JSON. - b, e := json.Marshal(l) - if e != nil { - h.httpError(e, w, http.StatusInternalServerError) - return - } - // Write HTTP status. - if err != nil { - // Another node owns the lease. - w.WriteHeader(http.StatusConflict) - } else { - // Lease successfully acquired. - w.WriteHeader(http.StatusOK) - } - // Write the lease data. - w.Header().Add("Content-Type", "application/json") - w.Write(b) - return -} - -type gzipResponseWriter struct { - io.Writer - http.ResponseWriter -} - -func (w gzipResponseWriter) Write(b []byte) (int, error) { - return w.Writer.Write(b) -} - -func (w gzipResponseWriter) Flush() { - w.Writer.(*gzip.Writer).Flush() -} - -func (w gzipResponseWriter) CloseNotify() <-chan bool { - return w.ResponseWriter.(http.CloseNotifier).CloseNotify() -} - -// determines if the client can accept compressed responses, and encodes accordingly -func gzipFilter(inner http.Handler) http.Handler { - return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - if !strings.Contains(r.Header.Get("Accept-Encoding"), "gzip") { - inner.ServeHTTP(w, r) - return - } - w.Header().Set("Content-Encoding", "gzip") - gz := gzip.NewWriter(w) - defer gz.Close() - gzw := gzipResponseWriter{Writer: gz, ResponseWriter: w} - inner.ServeHTTP(gzw, r) - }) -} - -// versionHeader takes a HTTP handler and returns a HTTP handler -// and adds the X-INFLUXBD-VERSION header to outgoing responses. -func versionHeader(inner http.Handler, h *handler) http.Handler { - return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.Header().Add("X-InfluxDB-Version", h.s.Version) - inner.ServeHTTP(w, r) - }) -} - -func requestID(inner http.Handler) http.Handler { - return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - uid := uuid.TimeUUID() - r.Header.Set("Request-Id", uid.String()) - w.Header().Set("Request-Id", r.Header.Get("Request-Id")) - - inner.ServeHTTP(w, r) - }) -} - -func logging(inner http.Handler, name string, weblog *log.Logger) http.Handler { - return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - start := time.Now() - l := &responseLogger{w: w} - inner.ServeHTTP(l, r) - logLine := buildLogLine(l, r, start) - weblog.Println(logLine) - }) -} - -func recovery(inner http.Handler, name string, weblog *log.Logger) http.Handler { - return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - start := time.Now() - l := &responseLogger{w: w} - - defer func() { - if err := recover(); err != nil { - b := make([]byte, 1024) - runtime.Stack(b, false) - logLine := buildLogLine(l, r, start) - logLine = fmt.Sprintf("%s [panic:%s]\n%s", logLine, err, string(b)) - weblog.Println(logLine) - } - }() - - inner.ServeHTTP(l, r) - }) -} - -func (h *handler) httpError(err error, w http.ResponseWriter, status int) { - if h.loggingEnabled { - h.logger.Println(err) - } - http.Error(w, "", status) -} diff --git a/services/meta/raft_state.go b/services/meta/raft_state.go deleted file mode 100644 index cb24d599f85..00000000000 --- a/services/meta/raft_state.go +++ /dev/null @@ -1,352 +0,0 @@ -package meta - -import ( - "fmt" - "io/ioutil" - "log" - "net" - "os" - "path/filepath" - "sync" - "time" - - "github.com/hashicorp/raft" - "github.com/hashicorp/raft-boltdb" -) - -// Raft configuration. -const ( - raftLogCacheSize = 512 - raftSnapshotsRetained = 2 - raftTransportMaxPool = 3 - raftTransportTimeout = 10 * time.Second -) - -// raftState is a consensus strategy that uses a local raft implementation for -// consensus operations. -type raftState struct { - wg sync.WaitGroup - config *Config - closing chan struct{} - raft *raft.Raft - transport *raft.NetworkTransport - peerStore raft.PeerStore - raftStore *raftboltdb.BoltStore - raftLayer *raftLayer - ln net.Listener - addr string - logger *log.Logger - path string -} - -func newRaftState(c *Config, addr string) *raftState { - return &raftState{ - config: c, - addr: addr, - } -} - -func (r *raftState) open(s *store, ln net.Listener, initializePeers []string) error { - r.ln = ln - r.closing = make(chan struct{}) - - // Setup raft configuration. - config := raft.DefaultConfig() - config.LogOutput = ioutil.Discard - - if r.config.ClusterTracing { - config.Logger = r.logger - } - config.HeartbeatTimeout = time.Duration(r.config.HeartbeatTimeout) - config.ElectionTimeout = time.Duration(r.config.ElectionTimeout) - config.LeaderLeaseTimeout = time.Duration(r.config.LeaderLeaseTimeout) - config.CommitTimeout = time.Duration(r.config.CommitTimeout) - // Since we actually never call `removePeer` this is safe. - // If in the future we decide to call remove peer we have to re-evaluate how to handle this - config.ShutdownOnRemove = false - - // Build raft layer to multiplex listener. - r.raftLayer = newRaftLayer(r.addr, r.ln) - - // Create a transport layer - r.transport = raft.NewNetworkTransport(r.raftLayer, 3, 10*time.Second, config.LogOutput) - - // Create peer storage. - r.peerStore = &peerStore{} - - // This server is joining the raft cluster for the first time if initializePeers are passed in - if len(initializePeers) > 0 { - if err := r.peerStore.SetPeers(initializePeers); err != nil { - return err - } - } - - peers, err := r.peerStore.Peers() - if err != nil { - return err - } - - // If no peers are set in the config or there is one and we are it, then start as a single server. - if len(initializePeers) <= 1 { - config.EnableSingleNode = true - - // Ensure we can always become the leader - config.DisableBootstrapAfterElect = false - - // Make sure our peer address is here. This happens with either a single node cluster - // or a node joining the cluster, as no one else has that information yet. - if !raft.PeerContained(peers, r.addr) { - if err := r.peerStore.SetPeers([]string{r.addr}); err != nil { - return err - } - } - - peers = []string{r.addr} - } - - // Create the log store and stable store. - store, err := raftboltdb.NewBoltStore(filepath.Join(r.path, "raft.db")) - if err != nil { - return fmt.Errorf("new bolt store: %s", err) - } - r.raftStore = store - - // Create the snapshot store. - snapshots, err := raft.NewFileSnapshotStore(r.path, raftSnapshotsRetained, os.Stderr) - if err != nil { - return fmt.Errorf("file snapshot store: %s", err) - } - - // Create raft log. - ra, err := raft.NewRaft(config, (*storeFSM)(s), store, store, snapshots, r.peerStore, r.transport) - if err != nil { - return fmt.Errorf("new raft: %s", err) - } - r.raft = ra - - r.wg.Add(1) - go r.logLeaderChanges() - - return nil -} - -func (r *raftState) logLeaderChanges() { - defer r.wg.Done() - // Logs our current state (Node at 1.2.3.4:8088 [Follower]) - r.logger.Printf(r.raft.String()) - for { - select { - case <-r.closing: - return - case <-r.raft.LeaderCh(): - peers, err := r.peers() - if err != nil { - r.logger.Printf("failed to lookup peers: %v", err) - } - r.logger.Printf("%v. peers=%v", r.raft.String(), peers) - } - } -} - -func (r *raftState) close() error { - if r == nil { - return nil - } - if r.closing != nil { - close(r.closing) - } - r.wg.Wait() - - if r.transport != nil { - r.transport.Close() - r.transport = nil - } - - // Shutdown raft. - if r.raft != nil { - if err := r.raft.Shutdown().Error(); err != nil { - return err - } - r.raft = nil - } - - if r.raftStore != nil { - r.raftStore.Close() - r.raftStore = nil - } - - return nil -} - -// apply applies a serialized command to the raft log. -func (r *raftState) apply(b []byte) error { - // Apply to raft log. - f := r.raft.Apply(b, 0) - if err := f.Error(); err != nil { - return err - } - - // Return response if it's an error. - // No other non-nil objects should be returned. - resp := f.Response() - if err, ok := resp.(error); ok { - return err - } - if resp != nil { - panic(fmt.Sprintf("unexpected response: %#v", resp)) - } - - return nil -} - -func (r *raftState) lastIndex() uint64 { - return r.raft.LastIndex() -} - -func (r *raftState) snapshot() error { - future := r.raft.Snapshot() - return future.Error() -} - -// addPeer adds addr to the list of peers in the cluster. -func (r *raftState) addPeer(addr string) error { - peers, err := r.peerStore.Peers() - if err != nil { - return err - } - - for _, p := range peers { - if addr == p { - return nil - } - } - - if fut := r.raft.AddPeer(addr); fut.Error() != nil { - return fut.Error() - } - return nil -} - -// removePeer removes addr from the list of peers in the cluster. -func (r *raftState) removePeer(addr string) error { - // Only do this on the leader - if !r.isLeader() { - return raft.ErrNotLeader - } - - peers, err := r.peerStore.Peers() - if err != nil { - return err - } - - var exists bool - for _, p := range peers { - if addr == p { - exists = true - break - } - } - - if !exists { - return nil - } - - if fut := r.raft.RemovePeer(addr); fut.Error() != nil { - return fut.Error() - } - return nil -} - -func (r *raftState) peers() ([]string, error) { - return r.peerStore.Peers() -} - -func (r *raftState) leader() string { - if r.raft == nil { - return "" - } - - return r.raft.Leader() -} - -func (r *raftState) isLeader() bool { - if r.raft == nil { - return false - } - return r.raft.State() == raft.Leader -} - -// raftLayer wraps the connection so it can be re-used for forwarding. -type raftLayer struct { - addr *raftLayerAddr - ln net.Listener - conn chan net.Conn - closed chan struct{} -} - -type raftLayerAddr struct { - addr string -} - -func (r *raftLayerAddr) Network() string { - return "tcp" -} - -func (r *raftLayerAddr) String() string { - return r.addr -} - -// newRaftLayer returns a new instance of raftLayer. -func newRaftLayer(addr string, ln net.Listener) *raftLayer { - return &raftLayer{ - addr: &raftLayerAddr{addr}, - ln: ln, - conn: make(chan net.Conn), - closed: make(chan struct{}), - } -} - -// Addr returns the local address for the layer. -func (l *raftLayer) Addr() net.Addr { - return l.addr -} - -// Dial creates a new network connection. -func (l *raftLayer) Dial(addr string, timeout time.Duration) (net.Conn, error) { - conn, err := net.DialTimeout("tcp", addr, timeout) - if err != nil { - return nil, err - } - // Write a marker byte for raft messages. - _, err = conn.Write([]byte{MuxHeader}) - if err != nil { - conn.Close() - return nil, err - } - return conn, err -} - -// Accept waits for the next connection. -func (l *raftLayer) Accept() (net.Conn, error) { return l.ln.Accept() } - -// Close closes the layer. -func (l *raftLayer) Close() error { return l.ln.Close() } - -// peerStore is an in-memory implementation of raft.PeerStore -type peerStore struct { - mu sync.RWMutex - peers []string -} - -func (m *peerStore) Peers() ([]string, error) { - m.mu.RLock() - defer m.mu.RUnlock() - return m.peers, nil -} - -func (m *peerStore) SetPeers(peers []string) error { - m.mu.Lock() - defer m.mu.Unlock() - m.peers = peers - return nil -} diff --git a/services/meta/service.go b/services/meta/service.go deleted file mode 100644 index 52386ff53c8..00000000000 --- a/services/meta/service.go +++ /dev/null @@ -1,210 +0,0 @@ -package meta // import "github.com/influxdata/influxdb/services/meta" - -import ( - "crypto/tls" - "fmt" - "io/ioutil" - "log" - "net" - "net/http" - "os" - "strings" - "time" - - "github.com/influxdata/influxdb" -) - -const ( - MuxHeader = 8 -) - -type Service struct { - RaftListener net.Listener - - Version string - - config *Config - handler *handler - ln net.Listener - httpAddr string - raftAddr string - https bool - cert string - err chan error - Logger *log.Logger - store *store - - Node *influxdb.Node -} - -// NewService returns a new instance of Service. -func NewService(c *Config) *Service { - s := &Service{ - config: c, - httpAddr: c.HTTPBindAddress, - raftAddr: c.BindAddress, - https: c.HTTPSEnabled, - cert: c.HTTPSCertificate, - err: make(chan error), - } - if c.LoggingEnabled { - s.Logger = log.New(os.Stderr, "[meta] ", log.LstdFlags) - } else { - s.Logger = log.New(ioutil.Discard, "", 0) - } - - return s -} - -// Open starts the service -func (s *Service) Open() error { - s.Logger.Println("Starting meta service") - - if s.RaftListener == nil { - panic("no raft listener set") - } - - // Open listener. - if s.https { - cert, err := tls.LoadX509KeyPair(s.cert, s.cert) - if err != nil { - return err - } - - listener, err := tls.Listen("tcp", s.httpAddr, &tls.Config{ - Certificates: []tls.Certificate{cert}, - }) - if err != nil { - return err - } - - s.Logger.Println("Listening on HTTPS:", listener.Addr().String()) - s.ln = listener - } else { - listener, err := net.Listen("tcp", s.httpAddr) - if err != nil { - return err - } - - s.Logger.Println("Listening on HTTP:", listener.Addr().String()) - s.ln = listener - } - - // wait for the listeners to start - timeout := time.Now().Add(raftListenerStartupTimeout) - for { - if s.ln.Addr() != nil && s.RaftListener.Addr() != nil { - break - } - - if time.Now().After(timeout) { - return fmt.Errorf("unable to open without http listener running") - } - time.Sleep(10 * time.Millisecond) - } - - var err error - if autoAssignPort(s.httpAddr) { - s.httpAddr, err = combineHostAndAssignedPort(s.ln, s.httpAddr) - } - if autoAssignPort(s.raftAddr) { - s.raftAddr, err = combineHostAndAssignedPort(s.RaftListener, s.raftAddr) - } - if err != nil { - return err - } - - // Open the store. The addresses passed in are remotely accessible. - s.store = newStore(s.config, s.remoteAddr(s.httpAddr), s.remoteAddr(s.raftAddr)) - s.store.node = s.Node - - handler := newHandler(s.config, s) - handler.logger = s.Logger - handler.store = s.store - s.handler = handler - - // Begin listening for requests in a separate goroutine. - go s.serve() - - if err := s.store.open(s.RaftListener); err != nil { - return err - } - - return nil -} - -func (s *Service) remoteAddr(addr string) string { - hostname := s.config.RemoteHostname - if hostname == "" { - hostname = DefaultHostname - } - remote, err := DefaultHost(hostname, addr) - if err != nil { - return addr - } - return remote -} - -// serve serves the handler from the listener. -func (s *Service) serve() { - // The listener was closed so exit - // See https://github.com/golang/go/issues/4373 - err := http.Serve(s.ln, s.handler) - if err != nil && !strings.Contains(err.Error(), "closed") { - s.err <- fmt.Errorf("listener failed: addr=%s, err=%s", s.ln.Addr(), err) - } -} - -// Close closes the underlying listener. -func (s *Service) Close() error { - if err := s.handler.Close(); err != nil { - return err - } - - if err := s.store.close(); err != nil { - return err - } - - if s.ln != nil { - if err := s.ln.Close(); err != nil { - return err - } - } - - return nil -} - -// HTTPAddr returns the bind address for the HTTP API -func (s *Service) HTTPAddr() string { - return s.httpAddr -} - -// RaftAddr returns the bind address for the Raft TCP listener -func (s *Service) RaftAddr() string { - return s.raftAddr -} - -// Err returns a channel for fatal errors that occur on the listener. -func (s *Service) Err() <-chan error { return s.err } - -// SetLogger sets the internal logger to the logger passed in. -func (s *Service) SetLogger(l *log.Logger) { - s.Logger = l -} - -func autoAssignPort(addr string) bool { - _, p, _ := net.SplitHostPort(addr) - return p == "0" -} - -func combineHostAndAssignedPort(ln net.Listener, autoAddr string) (string, error) { - host, _, err := net.SplitHostPort(autoAddr) - if err != nil { - return "", err - } - _, port, err := net.SplitHostPort(ln.Addr().String()) - if err != nil { - return "", err - } - return net.JoinHostPort(host, port), nil -} diff --git a/services/meta/service_test.go b/services/meta/service_test.go deleted file mode 100644 index 8d89646a15c..00000000000 --- a/services/meta/service_test.go +++ /dev/null @@ -1,1461 +0,0 @@ -package meta_test - -import ( - "encoding/json" - "fmt" - "io/ioutil" - "net" - "os" - "path" - "reflect" - "runtime" - "strings" - "sync" - "testing" - "time" - - "github.com/influxdata/influxdb" - - "github.com/influxdata/influxdb/influxql" - "github.com/influxdata/influxdb/services/meta" - "github.com/influxdata/influxdb/tcp" - "github.com/influxdata/influxdb/toml" -) - -func TestMetaService_CreateDatabase(t *testing.T) { - t.Parallel() - - d, s, c := newServiceAndClient() - defer os.RemoveAll(d) - defer s.Close() - defer c.Close() - - if _, err := c.CreateDatabase("db0"); err != nil { - t.Fatal(err) - } - - db, err := c.Database("db0") - if err != nil { - t.Fatal(err) - } else if db.Name != "db0" { - t.Fatalf("db name wrong: %s", db.Name) - } - - // Make sure a default retention policy was created. - _, err = c.RetentionPolicy("db0", "default") - if err != nil { - t.Fatal(err) - } else if db.DefaultRetentionPolicy != "default" { - t.Fatalf("rp name wrong: %s", db.DefaultRetentionPolicy) - } -} - -func TestMetaService_CreateDatabaseIfNotExists(t *testing.T) { - t.Parallel() - - d, s, c := newServiceAndClient() - defer os.RemoveAll(d) - defer s.Close() - defer c.Close() - - if _, err := c.CreateDatabase("db0"); err != nil { - t.Fatal(err) - } - - db, err := c.Database("db0") - if err != nil { - t.Fatal(err) - } else if db.Name != "db0" { - t.Fatalf("db name wrong: %s", db.Name) - } - - if _, err := c.CreateDatabase("db0"); err != nil { - t.Fatal(err) - } -} - -func TestMetaService_CreateDatabaseWithRetentionPolicy(t *testing.T) { - t.Parallel() - - d, s, c := newServiceAndClient() - defer os.RemoveAll(d) - defer s.Close() - defer c.Close() - - if _, err := c.CreateDatabaseWithRetentionPolicy("db0", &meta.RetentionPolicyInfo{ - Name: "rp0", - Duration: 1 * time.Hour, - ReplicaN: 1, - }); err != nil { - t.Fatal(err) - } - - db, err := c.Database("db0") - if err != nil { - t.Fatal(err) - } else if db.Name != "db0" { - t.Fatalf("db name wrong: %s", db.Name) - } - - rp := db.RetentionPolicy("rp0") - if err != nil { - t.Fatal(err) - } else if rp.Name != "rp0" { - t.Fatalf("rp name wrong: %s", rp.Name) - } else if rp.Duration != time.Hour { - t.Fatalf("rp duration wrong: %s", rp.Duration.String()) - } else if rp.ReplicaN != 1 { - t.Fatalf("rp replication wrong: %d", rp.ReplicaN) - } -} - -func TestMetaService_Databases(t *testing.T) { - t.Parallel() - - d, s, c := newServiceAndClient() - defer os.RemoveAll(d) - defer s.Close() - defer c.Close() - - // Create two databases. - db, err := c.CreateDatabase("db0") - if err != nil { - t.Fatal(err) - } else if db.Name != "db0" { - t.Fatalf("db name wrong: %s", db.Name) - } - - db, err = c.CreateDatabase("db1") - if err != nil { - t.Fatal(err) - } else if db.Name != "db1" { - t.Fatalf("db name wrong: %s", db.Name) - } - - dbs, err := c.Databases() - if err != nil { - t.Fatal(err) - } - if len(dbs) != 2 { - t.Fatalf("expected 2 databases but got %d", len(dbs)) - } else if dbs[0].Name != "db0" { - t.Fatalf("db name wrong: %s", dbs[0].Name) - } else if dbs[1].Name != "db1" { - t.Fatalf("db name wrong: %s", dbs[1].Name) - } -} - -func TestMetaService_DropDatabase(t *testing.T) { - t.Parallel() - - d, s, c := newServiceAndClient() - defer os.RemoveAll(d) - defer s.Close() - defer c.Close() - - if _, err := c.CreateDatabase("db0"); err != nil { - t.Fatal(err) - } - - db, err := c.Database("db0") - if err != nil { - t.Fatal(err) - } else if db.Name != "db0" { - t.Fatalf("db name wrong: %s", db.Name) - } - - if err := c.DropDatabase("db0"); err != nil { - t.Fatal(err) - } - - if db, _ = c.Database("db0"); db != nil { - t.Fatalf("expected database to not return: %v", db) - } - - // Dropping a database that does not exist is not an error. - if err := c.DropDatabase("db foo"); err != nil { - t.Fatalf("got %v error, but expected no error", err) - } -} - -func TestMetaService_CreateRetentionPolicy(t *testing.T) { - t.Parallel() - - d, s, c := newServiceAndClient() - defer os.RemoveAll(d) - defer s.Close() - defer c.Close() - - if _, err := c.CreateDatabase("db0"); err != nil { - t.Fatal(err) - } - - db, err := c.Database("db0") - if err != nil { - t.Fatal(err) - } else if db.Name != "db0" { - t.Fatalf("db name wrong: %s", db.Name) - } - - if _, err := c.CreateRetentionPolicy("db0", &meta.RetentionPolicyInfo{ - Name: "rp0", - Duration: 1 * time.Hour, - ReplicaN: 1, - }); err != nil { - t.Fatal(err) - } - - rp, err := c.RetentionPolicy("db0", "rp0") - if err != nil { - t.Fatal(err) - } else if rp.Name != "rp0" { - t.Fatalf("rp name wrong: %s", rp.Name) - } else if rp.Duration != time.Hour { - t.Fatalf("rp duration wrong: %s", rp.Duration.String()) - } else if rp.ReplicaN != 1 { - t.Fatalf("rp replication wrong: %d", rp.ReplicaN) - } - - // Create the same policy. Should not error. - if _, err := c.CreateRetentionPolicy("db0", &meta.RetentionPolicyInfo{ - Name: "rp0", - Duration: 1 * time.Hour, - ReplicaN: 1, - }); err != nil { - t.Fatal(err) - } - - rp, err = c.RetentionPolicy("db0", "rp0") - if err != nil { - t.Fatal(err) - } else if rp.Name != "rp0" { - t.Fatalf("rp name wrong: %s", rp.Name) - } else if rp.Duration != time.Hour { - t.Fatalf("rp duration wrong: %s", rp.Duration.String()) - } else if rp.ReplicaN != 1 { - t.Fatalf("rp replication wrong: %d", rp.ReplicaN) - } -} - -func TestMetaService_SetDefaultRetentionPolicy(t *testing.T) { - t.Parallel() - - d, s, c := newServiceAndClient() - defer os.RemoveAll(d) - defer s.Close() - defer c.Close() - - if _, err := c.CreateDatabaseWithRetentionPolicy("db0", &meta.RetentionPolicyInfo{ - Name: "rp0", - Duration: 1 * time.Hour, - ReplicaN: 1, - }); err != nil { - t.Fatal(err) - } - - db, err := c.Database("db0") - if err != nil { - t.Fatal(err) - } else if db.Name != "db0" { - t.Fatalf("db name wrong: %s", db.Name) - } - - rp, err := c.RetentionPolicy("db0", "rp0") - if err != nil { - t.Fatal(err) - } else if rp.Name != "rp0" { - t.Fatalf("rp name wrong: %s", rp.Name) - } else if rp.Duration != time.Hour { - t.Fatalf("rp duration wrong: %s", rp.Duration.String()) - } else if rp.ReplicaN != 1 { - t.Fatalf("rp replication wrong: %d", rp.ReplicaN) - } - - // Make sure default retention policy is now rp0 - if db.DefaultRetentionPolicy != "rp0" { - t.Fatalf("rp name wrong: %s", db.DefaultRetentionPolicy) - } -} - -func TestMetaService_DropRetentionPolicy(t *testing.T) { - t.Parallel() - - d, s, c := newServiceAndClient() - defer os.RemoveAll(d) - defer s.Close() - defer c.Close() - - if _, err := c.CreateDatabase("db0"); err != nil { - t.Fatal(err) - } - - db, err := c.Database("db0") - if err != nil { - t.Fatal(err) - } else if db.Name != "db0" { - t.Fatalf("db name wrong: %s", db.Name) - } - - if _, err := c.CreateRetentionPolicy("db0", &meta.RetentionPolicyInfo{ - Name: "rp0", - Duration: 1 * time.Hour, - ReplicaN: 1, - }); err != nil { - t.Fatal(err) - } - - rp, err := c.RetentionPolicy("db0", "rp0") - if err != nil { - t.Fatal(err) - } else if rp.Name != "rp0" { - t.Fatalf("rp name wrong: %s", rp.Name) - } else if rp.Duration != time.Hour { - t.Fatalf("rp duration wrong: %s", rp.Duration.String()) - } else if rp.ReplicaN != 1 { - t.Fatalf("rp replication wrong: %d", rp.ReplicaN) - } - - if err := c.DropRetentionPolicy("db0", "rp0"); err != nil { - t.Fatal(err) - } - - rp, err = c.RetentionPolicy("db0", "rp0") - if err != nil { - t.Fatal(err) - } else if rp != nil { - t.Fatalf("rp should have been dropped") - } -} - -func TestMetaService_CreateUser(t *testing.T) { - t.Parallel() - - d, s, c := newServiceAndClient() - defer os.RemoveAll(d) - defer s.Close() - defer c.Close() - - // Create an admin user - if _, err := c.CreateUser("fred", "supersecure", true); err != nil { - t.Fatal(err) - } - - // Create a non-admin user - if _, err := c.CreateUser("wilma", "password", false); err != nil { - t.Fatal(err) - } - - u, err := c.User("fred") - if err != nil { - t.Fatal(err) - } - if exp, got := "fred", u.Name; exp != got { - t.Fatalf("unexpected user name: exp: %s got: %s", exp, got) - } - if !u.Admin { - t.Fatalf("expected user to be admin") - } - - u, err = c.Authenticate("fred", "supersecure") - if u == nil || err != nil || u.Name != "fred" { - t.Fatalf("failed to authenticate") - } - - // Auth for bad password should fail - u, err = c.Authenticate("fred", "badpassword") - if u != nil || err != meta.ErrAuthenticate { - t.Fatalf("authentication should fail with %s", meta.ErrAuthenticate) - } - - // Auth for no password should fail - u, err = c.Authenticate("fred", "") - if u != nil || err != meta.ErrAuthenticate { - t.Fatalf("authentication should fail with %s", meta.ErrAuthenticate) - } - - // Change password should succeed. - if err := c.UpdateUser("fred", "moresupersecure"); err != nil { - t.Fatal(err) - } - - // Auth for old password should fail - u, err = c.Authenticate("fred", "supersecure") - if u != nil || err != meta.ErrAuthenticate { - t.Fatalf("authentication should fail with %s", meta.ErrAuthenticate) - } - - // Auth for new password should succeed. - u, err = c.Authenticate("fred", "moresupersecure") - if u == nil || err != nil || u.Name != "fred" { - t.Fatalf("failed to authenticate") - } - - // Auth for unkonwn user should fail - u, err = c.Authenticate("foo", "") - if u != nil || err != meta.ErrUserNotFound { - t.Fatalf("authentication should fail with %s", meta.ErrUserNotFound) - } - - u, err = c.User("wilma") - if err != nil { - t.Fatal(err) - } - if exp, got := "wilma", u.Name; exp != got { - t.Fatalf("unexpected user name: exp: %s got: %s", exp, got) - } - if u.Admin { - t.Fatalf("expected user not to be an admin") - } - - if exp, got := 2, c.UserCount(); exp != got { - t.Fatalf("unexpected user count. got: %d exp: %d", got, exp) - } - - // Grant privilidges to a non-admin user - if err := c.SetAdminPrivilege("wilma", true); err != nil { - t.Fatal(err) - } - - u, err = c.User("wilma") - if err != nil { - t.Fatal(err) - } - if exp, got := "wilma", u.Name; exp != got { - t.Fatalf("unexpected user name: exp: %s got: %s", exp, got) - } - if !u.Admin { - t.Fatalf("expected user to be an admin") - } - - // Revoke privilidges from user - if err := c.SetAdminPrivilege("wilma", false); err != nil { - t.Fatal(err) - } - - u, err = c.User("wilma") - if err != nil { - t.Fatal(err) - } - if exp, got := "wilma", u.Name; exp != got { - t.Fatalf("unexpected user name: exp: %s got: %s", exp, got) - } - if u.Admin { - t.Fatalf("expected user not to be an admin") - } - - // Create a database to use for assiging privileges to. - if _, err := c.CreateDatabase("db0"); err != nil { - t.Fatal(err) - } - - db, err := c.Database("db0") - if err != nil { - t.Fatal(err) - } else if db.Name != "db0" { - t.Fatalf("db name wrong: %s", db.Name) - } - - // Assign a single privilege at the database level - if err := c.SetPrivilege("wilma", "db0", influxql.ReadPrivilege); err != nil { - t.Fatal(err) - } - - p, err := c.UserPrivilege("wilma", "db0") - if err != nil { - t.Fatal(err) - } - if p == nil { - t.Fatal("expected privilege but was nil") - } - if exp, got := influxql.ReadPrivilege, *p; exp != got { - t.Fatalf("unexpected privilege. exp: %d, got: %d", exp, got) - } - - // Remove a single privilege at the database level - if err := c.SetPrivilege("wilma", "db0", influxql.NoPrivileges); err != nil { - t.Fatal(err) - } - p, err = c.UserPrivilege("wilma", "db0") - if err != nil { - t.Fatal(err) - } - if p == nil { - t.Fatal("expected privilege but was nil") - } - if exp, got := influxql.NoPrivileges, *p; exp != got { - t.Fatalf("unexpected privilege. exp: %d, got: %d", exp, got) - } - - // Drop a user - if err := c.DropUser("wilma"); err != nil { - t.Fatal(err) - } - - u, err = c.User("wilma") - if err != meta.ErrUserNotFound { - t.Fatalf("user lookup should fail with %s", meta.ErrUserNotFound) - } - - if exp, got := 1, c.UserCount(); exp != got { - t.Fatalf("unexpected user count. got: %d exp: %d", got, exp) - } -} - -func TestMetaService_ContinuousQueries(t *testing.T) { - t.Parallel() - - d, s, c := newServiceAndClient() - defer os.RemoveAll(d) - defer s.Close() - defer c.Close() - - // Create a database to use - if _, err := c.CreateDatabase("db0"); err != nil { - t.Fatal(err) - } - db, err := c.Database("db0") - if err != nil { - t.Fatal(err) - } else if db.Name != "db0" { - t.Fatalf("db name wrong: %s", db.Name) - } - - // Create a CQ - if err := c.CreateContinuousQuery("db0", "cq0", `SELECT count(value) INTO foo_count FROM foo GROUP BY time(10m)`); err != nil { - t.Fatal(err) - } - - // Recreate an existing CQ - if err := c.CreateContinuousQuery("db0", "cq0", `SELECT max(value) INTO foo_max FROM foo GROUP BY time(10m)`); err == nil || err.Error() != `continuous query already exists` { - t.Fatalf("unexpected error: %s", err) - } - - // Create a few more CQ's - if err := c.CreateContinuousQuery("db0", "cq1", `SELECT max(value) INTO foo_max FROM foo GROUP BY time(10m)`); err != nil { - t.Fatal(err) - } - if err := c.CreateContinuousQuery("db0", "cq2", `SELECT min(value) INTO foo_min FROM foo GROUP BY time(10m)`); err != nil { - t.Fatal(err) - } - - // Drop a single CQ - if err := c.DropContinuousQuery("db0", "cq1"); err != nil { - t.Fatal(err) - } -} - -func TestMetaService_Subscriptions_Create(t *testing.T) { - t.Parallel() - - d, s, c := newServiceAndClient() - defer os.RemoveAll(d) - defer s.Close() - defer c.Close() - - // Create a database to use - if _, err := c.CreateDatabase("db0"); err != nil { - t.Fatal(err) - } - db, err := c.Database("db0") - if err != nil { - t.Fatal(err) - } else if db.Name != "db0" { - t.Fatalf("db name wrong: %s", db.Name) - } - - // Create a subscription - if err := c.CreateSubscription("db0", "default", "sub0", "ALL", []string{"udp://example.com:9090"}); err != nil { - t.Fatal(err) - } - - // Re-create a subscription - if err := c.CreateSubscription("db0", "default", "sub0", "ALL", []string{"udp://example.com:9090"}); err == nil || err.Error() != `subscription already exists` { - t.Fatalf("unexpected error: %s", err) - } - - // Create another subscription. - if err := c.CreateSubscription("db0", "default", "sub1", "ALL", []string{"udp://example.com:6060"}); err != nil { - t.Fatal(err) - } -} - -func TestMetaService_Subscriptions_Drop(t *testing.T) { - t.Parallel() - - d, s, c := newServiceAndClient() - defer os.RemoveAll(d) - defer s.Close() - defer c.Close() - - // Create a database to use - if _, err := c.CreateDatabase("db0"); err != nil { - t.Fatal(err) - } - - // DROP SUBSCRIPTION returns ErrSubscriptionNotFound when the - // subscription is unknown. - err := c.DropSubscription("db0", "default", "foo") - if got, exp := err, meta.ErrSubscriptionNotFound; got.Error() != exp.Error() { - t.Fatalf("got: %s, exp: %s", got, exp) - } - - // Create a subscription. - if err := c.CreateSubscription("db0", "default", "sub0", "ALL", []string{"udp://example.com:9090"}); err != nil { - t.Fatal(err) - } - - // DROP SUBSCRIPTION returns an influxdb.ErrDatabaseNotFound when - // the database is unknown. - err = c.DropSubscription("foo", "default", "sub0") - if got, exp := err, influxdb.ErrDatabaseNotFound("foo"); got.Error() != exp.Error() { - t.Fatalf("got: %s, exp: %s", got, exp) - } - - // DROP SUBSCRIPTION returns an influxdb.ErrRetentionPolicyNotFound - // when the retention policy is unknown. - err = c.DropSubscription("db0", "foo_policy", "sub0") - if got, exp := err, influxdb.ErrRetentionPolicyNotFound("foo_policy"); got.Error() != exp.Error() { - t.Fatalf("got: %s, exp: %s", got, exp) - } - - // DROP SUBSCRIPTION drops the subsciption if it can find it. - err = c.DropSubscription("db0", "default", "sub0") - if got := err; got != nil { - t.Fatalf("got: %s, exp: %v", got, nil) - } -} - -func TestMetaService_Shards(t *testing.T) { - t.Parallel() - - d, s, c := newServiceAndClient() - defer os.RemoveAll(d) - defer s.Close() - defer c.Close() - - exp := &meta.NodeInfo{ - ID: 2, - Host: "foo:8180", - TCPHost: "bar:8281", - } - - if _, err := c.CreateDataNode(exp.Host, exp.TCPHost); err != nil { - t.Fatal(err) - } - - if _, err := c.CreateDatabase("db0"); err != nil { - t.Fatal(err) - } - - // Test creating a shard group. - tmin := time.Now() - sg, err := c.CreateShardGroup("db0", "default", tmin) - if err != nil { - t.Fatal(err) - } else if sg == nil { - t.Fatalf("expected ShardGroup") - } - - // Test pre-creating shard groups. - dur := sg.EndTime.Sub(sg.StartTime) + time.Nanosecond - tmax := tmin.Add(dur) - if err := c.PrecreateShardGroups(tmin, tmax); err != nil { - t.Fatal(err) - } - - // Test finding shard groups by time range. - groups, err := c.ShardGroupsByTimeRange("db0", "default", tmin, tmax) - if err != nil { - t.Fatal(err) - } else if len(groups) != 2 { - t.Fatalf("wrong number of shard groups: %d", len(groups)) - } - - // Test finding shard owner. - db, rp, owner := c.ShardOwner(groups[0].Shards[0].ID) - if db != "db0" { - t.Fatalf("wrong db name: %s", db) - } else if rp != "default" { - t.Fatalf("wrong rp name: %s", rp) - } else if owner.ID != groups[0].ID { - t.Fatalf("wrong owner: exp %d got %d", groups[0].ID, owner.ID) - } - - // Test deleting a shard group. - if err := c.DeleteShardGroup("db0", "default", groups[0].ID); err != nil { - t.Fatal(err) - } else if groups, err = c.ShardGroupsByTimeRange("db0", "default", tmin, tmax); err != nil { - t.Fatal(err) - } else if len(groups) != 1 { - t.Fatalf("wrong number of shard groups after delete: %d", len(groups)) - } -} - -func TestMetaService_CreateRemoveMetaNode(t *testing.T) { - t.Parallel() - - joinPeers := freePorts(4) - raftPeers := freePorts(4) - - cfg1 := newConfig() - cfg1.HTTPBindAddress = joinPeers[0] - cfg1.BindAddress = raftPeers[0] - defer os.RemoveAll(cfg1.Dir) - cfg2 := newConfig() - cfg2.HTTPBindAddress = joinPeers[1] - cfg2.BindAddress = raftPeers[1] - defer os.RemoveAll(cfg2.Dir) - - var wg sync.WaitGroup - wg.Add(2) - cfg1.JoinPeers = joinPeers[0:2] - s1 := newService(cfg1) - go func() { - defer wg.Done() - if err := s1.Open(); err != nil { - t.Fatal(err) - } - }() - defer s1.Close() - - cfg2.JoinPeers = joinPeers[0:2] - s2 := newService(cfg2) - go func() { - defer wg.Done() - if err := s2.Open(); err != nil { - t.Fatal(err) - } - }() - defer s2.Close() - wg.Wait() - - cfg3 := newConfig() - joinPeers[2] = freePort() - cfg3.HTTPBindAddress = joinPeers[2] - raftPeers[2] = freePort() - cfg3.BindAddress = raftPeers[2] - defer os.RemoveAll(cfg3.Dir) - - cfg3.JoinPeers = joinPeers[0:3] - s3 := newService(cfg3) - if err := s3.Open(); err != nil { - t.Fatal(err) - } - defer s3.Close() - - c1 := meta.NewClient() - c1.SetMetaServers(joinPeers[0:3]) - if err := c1.Open(); err != nil { - t.Fatal(err) - } - defer c1.Close() - - metaNodes, _ := c1.MetaNodes() - if len(metaNodes) != 3 { - t.Fatalf("meta nodes wrong: %v", metaNodes) - } - - c := meta.NewClient() - c.SetMetaServers([]string{s1.HTTPAddr()}) - if err := c.Open(); err != nil { - t.Fatal(err) - } - defer c.Close() - - if err := c.DeleteMetaNode(3); err != nil { - t.Fatal(err) - } - - metaNodes, _ = c.MetaNodes() - if len(metaNodes) != 2 { - t.Fatalf("meta nodes wrong: %v", metaNodes) - } - - cfg4 := newConfig() - cfg4.HTTPBindAddress = freePort() - cfg4.BindAddress = freePort() - cfg4.JoinPeers = []string{joinPeers[0], joinPeers[1], cfg4.HTTPBindAddress} - defer os.RemoveAll(cfg4.Dir) - s4 := newService(cfg4) - if err := s4.Open(); err != nil { - t.Fatal(err) - } - defer s4.Close() - - c2 := meta.NewClient() - c2.SetMetaServers(cfg4.JoinPeers) - if err := c2.Open(); err != nil { - t.Fatal(err) - } - defer c2.Close() - - metaNodes, _ = c2.MetaNodes() - if len(metaNodes) != 3 { - t.Fatalf("meta nodes wrong: %v", metaNodes) - } -} - -// Ensure that if we attempt to create a database and the client -// is pointed at a server that isn't the leader, it automatically -// hits the leader and finishes the command -func TestMetaService_CommandAgainstNonLeader(t *testing.T) { - t.Parallel() - - cfgs := make([]*meta.Config, 3) - srvs := make([]*testService, 3) - joinPeers := freePorts(len(cfgs)) - - var wg sync.WaitGroup - wg.Add(len(cfgs)) - - for i, _ := range cfgs { - c := newConfig() - c.HTTPBindAddress = joinPeers[i] - c.JoinPeers = joinPeers - cfgs[i] = c - - srvs[i] = newService(c) - go func(srv *testService) { - defer wg.Done() - if err := srv.Open(); err != nil { - t.Fatal(err) - } - }(srvs[i]) - defer srvs[i].Close() - defer os.RemoveAll(c.Dir) - } - wg.Wait() - - for i := range cfgs { - c := meta.NewClient() - c.SetMetaServers([]string{joinPeers[i]}) - if err := c.Open(); err != nil { - t.Fatal(err) - } - defer c.Close() - - metaNodes, _ := c.MetaNodes() - if len(metaNodes) != 3 { - t.Fatalf("node %d - meta nodes wrong: %v", i, metaNodes) - } - - if _, err := c.CreateDatabase(fmt.Sprintf("foo%d", i)); err != nil { - t.Fatalf("node %d: %s", i, err) - } - - if db, err := c.Database(fmt.Sprintf("foo%d", i)); db == nil || err != nil { - t.Fatalf("node %d: database foo wasn't created: %s", i, err) - } - } -} - -// Ensure that the client will fail over to another server if the leader goes -// down. Also ensure that the cluster will come back up successfully after restart -func TestMetaService_FailureAndRestartCluster(t *testing.T) { - t.Parallel() - - cfgs := make([]*meta.Config, 3) - srvs := make([]*testService, 3) - joinPeers := freePorts(len(cfgs)) - raftPeers := freePorts(len(cfgs)) - - var swg sync.WaitGroup - swg.Add(len(cfgs)) - for i, _ := range cfgs { - c := newConfig() - c.HTTPBindAddress = joinPeers[i] - c.BindAddress = raftPeers[i] - c.JoinPeers = joinPeers - cfgs[i] = c - - srvs[i] = newService(c) - go func(i int, srv *testService) { - defer swg.Done() - if err := srv.Open(); err != nil { - t.Logf("opening server %d", i) - t.Fatal(err) - } - }(i, srvs[i]) - - defer srvs[i].Close() - defer os.RemoveAll(c.Dir) - } - swg.Wait() - - c := meta.NewClient() - c.SetMetaServers(joinPeers) - if err := c.Open(); err != nil { - t.Fatal(err) - } - defer c.Close() - - // check to see we were assigned a valid clusterID - c1ID := c.ClusterID() - if c1ID == 0 { - t.Fatalf("invalid cluster id: %d", c1ID) - } - - if _, err := c.CreateDatabase("foo"); err != nil { - t.Fatal(err) - } - - if db, err := c.Database("foo"); db == nil || err != nil { - t.Fatalf("database foo wasn't created: %s", err) - } - - if err := srvs[0].Close(); err != nil { - t.Fatal(err) - } - - if _, err := c.CreateDatabase("bar"); err != nil { - t.Fatal(err) - } - - if db, err := c.Database("bar"); db == nil || err != nil { - t.Fatalf("database bar wasn't created: %s", err) - } - - if err := srvs[1].Close(); err != nil { - t.Fatal(err) - } - if err := srvs[2].Close(); err != nil { - t.Fatal(err) - } - - // give them a second to shut down - time.Sleep(time.Second) - - // need to start them all at once so they can discover the bind addresses for raft - var wg sync.WaitGroup - wg.Add(len(cfgs)) - for i, cfg := range cfgs { - srvs[i] = newService(cfg) - go func(srv *testService) { - if err := srv.Open(); err != nil { - panic(err) - } - wg.Done() - }(srvs[i]) - defer srvs[i].Close() - } - wg.Wait() - time.Sleep(time.Second) - - c2 := meta.NewClient() - c2.SetMetaServers(joinPeers) - if err := c2.Open(); err != nil { - t.Fatal(err) - } - defer c2.Close() - - c2ID := c2.ClusterID() - if c1ID != c2ID { - t.Fatalf("invalid cluster id. got: %d, exp: %d", c2ID, c1ID) - } - - if db, err := c2.Database("bar"); db == nil || err != nil { - t.Fatalf("database bar wasn't created: %s", err) - } - - if _, err := c2.CreateDatabase("asdf"); err != nil { - t.Fatal(err) - } - - if db, err := c2.Database("asdf"); db == nil || err != nil { - t.Fatalf("database bar wasn't created: %s", err) - } -} - -// Ensures that everything works after a host name change. This is -// skipped by default. To enable add hosts foobar and asdf to your -// /etc/hosts file and point those to 127.0.0.1 -func TestMetaService_NameChangeSingleNode(t *testing.T) { - t.Skip("not enabled") - t.Parallel() - - cfg := newConfig() - defer os.RemoveAll(cfg.Dir) - cfg.BindAddress = "foobar:0" - cfg.HTTPBindAddress = "foobar:0" - s := newService(cfg) - if err := s.Open(); err != nil { - t.Fatal(err) - } - defer s.Close() - - c := meta.NewClient() - c.SetMetaServers([]string{s.HTTPAddr()}) - if err := c.Open(); err != nil { - t.Fatal(err) - } - defer c.Close() - - if _, err := c.CreateDatabase("foo"); err != nil { - t.Fatal(err) - } - - s.Close() - time.Sleep(time.Second) - - cfg.BindAddress = "asdf" + ":" + strings.Split(s.RaftAddr(), ":")[1] - cfg.HTTPBindAddress = "asdf" + ":" + strings.Split(s.HTTPAddr(), ":")[1] - s = newService(cfg) - if err := s.Open(); err != nil { - t.Fatal(err) - } - defer s.Close() - - c2 := meta.NewClient() - c2.SetMetaServers([]string{s.HTTPAddr()}) - if err := c2.Open(); err != nil { - t.Fatal(err) - } - defer c2.Close() - - db, err := c2.Database("foo") - if db == nil || err != nil { - t.Fatal(err) - } - - nodes, err := c2.MetaNodes() - if err != nil { - t.Fatal(err) - } - exp := []meta.NodeInfo{{ID: 1, Host: cfg.HTTPBindAddress, TCPHost: cfg.BindAddress}} - - time.Sleep(10 * time.Second) - if !reflect.DeepEqual(nodes, exp) { - t.Fatalf("nodes don't match: %v", nodes) - } -} - -func TestMetaService_CreateDataNode(t *testing.T) { - t.Parallel() - - d, s, c := newServiceAndClient() - defer os.RemoveAll(d) - defer s.Close() - defer c.Close() - - exp := &meta.NodeInfo{ - ID: 1, - Host: "foo:8180", - TCPHost: "bar:8281", - } - - n, err := c.CreateDataNode(exp.Host, exp.TCPHost) - if err != nil { - t.Fatal(err) - } - - if !reflect.DeepEqual(n, exp) { - t.Fatalf("data node attributes wrong: %v", n) - } - - nodes, err := c.DataNodes() - if err != nil { - t.Fatal(err) - } - - if !reflect.DeepEqual(nodes, []meta.NodeInfo{*exp}) { - t.Fatalf("nodes wrong: %v", nodes) - } -} - -func TestMetaService_DropDataNode(t *testing.T) { - t.Parallel() - - d, s, c := newServiceAndClient() - defer os.RemoveAll(d) - defer s.Close() - defer c.Close() - - // Dropping a data node with an invalid ID returns an error - if err := c.DeleteDataNode(0); err == nil { - t.Fatalf("Didn't get an error but expected %s", meta.ErrNodeNotFound) - } else if err.Error() != meta.ErrNodeNotFound.Error() { - t.Fatalf("got %v, expected %v", err, meta.ErrNodeNotFound) - } - - // Create a couple of nodes. - n1, err := c.CreateDataNode("foo:8180", "bar:8181") - if err != nil { - t.Fatal(err) - } - - n2, err := c.CreateDataNode("foo:8280", "bar:8281") - if err != nil { - t.Fatal(err) - } - - // Create a database and shard group. The default retention policy - // means that the created shards should be replicated (owned) by - // both the data nodes. - if _, err := c.CreateDatabase("foo"); err != nil { - t.Fatal(err) - } - - sg, err := c.CreateShardGroup("foo", "default", time.Now()) - if err != nil { - t.Fatal(err) - } - - // Dropping the first data server should result in that node ID - // being removed as an owner of the shard. - if err := c.DeleteDataNode(n1.ID); err != nil { - t.Fatal(err) - } - - // Retrieve updated shard group data from the Meta Store. - rp, _ := c.RetentionPolicy("foo", "default") - sg = &rp.ShardGroups[0] - - // The first data node should be removed as an owner of the shard on - // the shard group - if !reflect.DeepEqual(sg.Shards[0].Owners, []meta.ShardOwner{{n2.ID}}) { - t.Errorf("owners for shard are %v, expected %v", sg.Shards[0].Owners, []meta.ShardOwner{{2}}) - } - - // The shard group should still be marked as active because it still - // has a shard with owners. - if sg.Deleted() { - t.Error("shard group marked as deleted, but shouldn't be") - } - - // Dropping the second data node will orphan the shard, but as - // there won't be any shards left in the shard group, the shard - // group will be deleted. - if err := c.DeleteDataNode(n2.ID); err != nil { - t.Fatal(err) - } - - // Retrieve updated data. - rp, _ = c.RetentionPolicy("foo", "default") - sg = &rp.ShardGroups[0] - - if got, exp := sg.Deleted(), true; got != exp { - t.Error("Shard group not marked as deleted") - } -} - -func TestMetaService_DropDataNode_Reassign(t *testing.T) { - t.Parallel() - - d, s, c := newServiceAndClient() - defer os.RemoveAll(d) - defer s.Close() - defer c.Close() - - // Create a couple of nodes. - n1, err := c.CreateDataNode("foo:8180", "bar:8181") - if err != nil { - t.Fatal(err) - } - - n2, err := c.CreateDataNode("foo:8280", "bar:8281") - if err != nil { - t.Fatal(err) - } - - // Create a retention policy with a replica factor of 1. - rp := meta.NewRetentionPolicyInfo("rp0") - rp.ReplicaN = 1 - - // Create a database using rp0 - if _, err := c.CreateDatabaseWithRetentionPolicy("foo", rp); err != nil { - t.Fatal(err) - } - - sg, err := c.CreateShardGroup("foo", "rp0", time.Now()) - if err != nil { - t.Fatal(err) - } - - // Dropping the first data server should result in the shard being - // reassigned to the other node. - if err := c.DeleteDataNode(n1.ID); err != nil { - t.Fatal(err) - } - - // Retrieve updated shard group data from the Meta Store. - rp, _ = c.RetentionPolicy("foo", "rp0") - sg = &rp.ShardGroups[0] - - // There should still be two shards. - if got, exp := len(sg.Shards), 2; got != exp { - t.Errorf("there are %d shards, but should be %d", got, exp) - } - - // The second data node should be the owner of both shards. - for _, s := range sg.Shards { - if !reflect.DeepEqual(s.Owners, []meta.ShardOwner{{n2.ID}}) { - t.Errorf("owners for shard are %v, expected %v", s.Owners, []meta.ShardOwner{{2}}) - } - } - - // The shard group should not be marked as deleted because both - // shards have an owner. - if sg.Deleted() { - t.Error("shard group marked as deleted, but shouldn't be") - } -} - -func TestMetaService_PersistClusterIDAfterRestart(t *testing.T) { - t.Parallel() - - cfg := newConfig() - defer os.RemoveAll(cfg.Dir) - s := newService(cfg) - if err := s.Open(); err != nil { - t.Fatal(err) - } - defer s.Close() - - c := meta.NewClient() - c.SetMetaServers([]string{s.HTTPAddr()}) - if err := c.Open(); err != nil { - t.Fatal(err) - } - id := c.ClusterID() - if id == 0 { - t.Fatal("cluster ID can't be zero") - } - - s.Close() - s = newService(cfg) - if err := s.Open(); err != nil { - t.Fatal(err) - } - - c = meta.NewClient() - c.SetMetaServers([]string{s.HTTPAddr()}) - if err := c.Open(); err != nil { - t.Fatal(err) - } - defer c.Close() - - idAfter := c.ClusterID() - if idAfter == 0 { - t.Fatal("cluster ID can't be zero") - } else if idAfter != id { - t.Fatalf("cluster id not the same: %d, %d", idAfter, id) - } -} - -func TestMetaService_Ping(t *testing.T) { - cfgs := make([]*meta.Config, 3) - srvs := make([]*testService, 3) - joinPeers := freePorts(len(cfgs)) - - var swg sync.WaitGroup - swg.Add(len(cfgs)) - - for i, _ := range cfgs { - c := newConfig() - c.HTTPBindAddress = joinPeers[i] - c.JoinPeers = joinPeers - cfgs[i] = c - - srvs[i] = newService(c) - go func(i int, srv *testService) { - defer swg.Done() - if err := srv.Open(); err != nil { - t.Fatalf("error opening server %d: %s", i, err) - } - }(i, srvs[i]) - defer srvs[i].Close() - defer os.RemoveAll(c.Dir) - } - swg.Wait() - - c := meta.NewClient() - c.SetMetaServers(joinPeers) - if err := c.Open(); err != nil { - t.Fatal(err) - } - defer c.Close() - - if err := c.Ping(false); err != nil { - t.Fatalf("ping false all failed: %s", err) - } - if err := c.Ping(true); err != nil { - t.Fatalf("ping false true failed: %s", err) - } - - srvs[1].Close() - // give the server time to close - time.Sleep(time.Second) - - if err := c.Ping(false); err != nil { - t.Fatalf("ping false some failed: %s", err) - } - - if err := c.Ping(true); err == nil { - t.Fatal("expected error on ping") - } -} - -func TestMetaService_AcquireLease(t *testing.T) { - t.Parallel() - - d, s, c1 := newServiceAndClient() - c2 := newClient(s) - defer os.RemoveAll(d) - defer s.Close() - defer c1.Close() - defer c2.Close() - - n1, err := c1.CreateDataNode("foo1:8180", "bar1:8281") - if err != nil { - t.Fatal(err) - } - - n2, err := c2.CreateDataNode("foo2:8180", "bar2:8281") - if err != nil { - t.Fatal(err) - } - - // Client 1 acquires a lease. Should succeed. - l, err := c1.AcquireLease("foo") - if err != nil { - t.Fatal(err) - } else if l == nil { - t.Fatal("expected *Lease") - } else if l.Name != "foo" { - t.Fatalf("lease name wrong: %s", l.Name) - } else if l.Owner != n1.ID { - t.Fatalf("owner ID wrong. exp %d got %d", n1.ID, l.Owner) - } - - // Client 2 attempts to acquire the same lease. Should fail. - l, err = c2.AcquireLease("foo") - if err == nil { - t.Fatal("expected to fail because another node owns the lease") - } - - // Wait for Client 1's lease to expire. - time.Sleep(1 * time.Second) - - // Client 2 retries to acquire the lease. Should succeed this time. - l, err = c2.AcquireLease("foo") - if err != nil { - t.Fatal(err) - } else if l == nil { - t.Fatal("expected *Lease") - } else if l.Name != "foo" { - t.Fatalf("lease name wrong: %s", l.Name) - } else if l.Owner != n2.ID { - t.Fatalf("owner ID wrong. exp %d got %d", n2.ID, l.Owner) - } -} - -// newServiceAndClient returns new data directory, *Service, and *Client or panics. -// Caller is responsible for deleting data dir and closing client. -func newServiceAndClient() (string, *testService, *meta.Client) { - cfg := newConfig() - s := newService(cfg) - if err := s.Open(); err != nil { - panic(err) - } - - c := newClient(s) - - return cfg.Dir, s, c -} - -func newClient(s *testService) *meta.Client { - c := meta.NewClient() - c.SetMetaServers([]string{s.HTTPAddr()}) - if err := c.Open(); err != nil { - panic(err) - } - return c -} - -func newConfig() *meta.Config { - cfg := meta.NewConfig() - cfg.BindAddress = "127.0.0.1:0" - cfg.HTTPBindAddress = "127.0.0.1:0" - cfg.Dir = testTempDir(2) - cfg.LeaseDuration = toml.Duration(1 * time.Second) - return cfg -} - -func testTempDir(skip int) string { - // Get name of the calling function. - pc, _, _, ok := runtime.Caller(skip) - if !ok { - panic("failed to get name of test function") - } - _, prefix := path.Split(runtime.FuncForPC(pc).Name()) - // Make a temp dir prefixed with calling function's name. - dir, err := ioutil.TempDir(os.TempDir(), prefix) - if err != nil { - panic(err) - } - return dir -} - -type testService struct { - *meta.Service - ln net.Listener -} - -func (t *testService) Close() error { - if err := t.Service.Close(); err != nil { - return err - } - return t.ln.Close() -} - -func newService(cfg *meta.Config) *testService { - // Open shared TCP connection. - ln, err := net.Listen("tcp", cfg.BindAddress) - if err != nil { - panic(err) - } - - // Multiplex listener. - mux := tcp.NewMux() - - if err != nil { - panic(err) - } - s := meta.NewService(cfg) - s.Node = influxdb.NewNode(cfg.Dir) - s.RaftListener = mux.Listen(meta.MuxHeader) - - go mux.Serve(ln) - - return &testService{Service: s, ln: ln} -} - -func mustParseStatement(s string) influxql.Statement { - stmt, err := influxql.ParseStatement(s) - if err != nil { - panic(err) - } - return stmt -} - -func mustMarshalJSON(v interface{}) string { - b, e := json.Marshal(v) - if e != nil { - panic(e) - } - return string(b) -} - -func freePort() string { - l, _ := net.Listen("tcp", "127.0.0.1:0") - defer l.Close() - return l.Addr().String() -} - -func freePorts(i int) []string { - var ports []string - for j := 0; j < i; j++ { - ports = append(ports, freePort()) - } - return ports -} diff --git a/services/meta/store.go b/services/meta/store.go deleted file mode 100644 index 45f2c4197e1..00000000000 --- a/services/meta/store.go +++ /dev/null @@ -1,450 +0,0 @@ -package meta - -import ( - "errors" - "fmt" - "io/ioutil" - "log" - "math/rand" - "net" - "os" - "sync" - "time" - - "github.com/influxdata/influxdb" - "github.com/influxdata/influxdb/services/meta/internal" - - "github.com/gogo/protobuf/proto" - "github.com/hashicorp/raft" -) - -// Retention policy settings. -const ( - autoCreateRetentionPolicyName = "default" - autoCreateRetentionPolicyPeriod = 0 - - // maxAutoCreatedRetentionPolicyReplicaN is the maximum replication factor that will - // be set for auto-created retention policies. - maxAutoCreatedRetentionPolicyReplicaN = 3 -) - -// Raft configuration. -const ( - raftListenerStartupTimeout = time.Second -) - -type store struct { - mu sync.RWMutex - closing chan struct{} - - config *Config - data *Data - raftState *raftState - dataChanged chan struct{} - path string - opened bool - logger *log.Logger - - raftAddr string - httpAddr string - - node *influxdb.Node -} - -// newStore will create a new metastore with the passed in config -func newStore(c *Config, httpAddr, raftAddr string) *store { - s := store{ - data: &Data{ - Index: 1, - }, - closing: make(chan struct{}), - dataChanged: make(chan struct{}), - path: c.Dir, - config: c, - httpAddr: httpAddr, - raftAddr: raftAddr, - } - if c.LoggingEnabled { - s.logger = log.New(os.Stderr, "[metastore] ", log.LstdFlags) - } else { - s.logger = log.New(ioutil.Discard, "", 0) - } - - return &s -} - -// open opens and initializes the raft store. -func (s *store) open(raftln net.Listener) error { - s.logger.Printf("Using data dir: %v", s.path) - - joinPeers, err := s.filterAddr(s.config.JoinPeers, s.httpAddr) - if err != nil { - return err - } - joinPeers = s.config.JoinPeers - - var initializePeers []string - if len(joinPeers) > 0 { - c := NewClient() - c.SetMetaServers(joinPeers) - c.SetTLS(s.config.HTTPSEnabled) - for { - peers := c.peers() - if !Peers(peers).Contains(s.raftAddr) { - peers = append(peers, s.raftAddr) - } - if len(s.config.JoinPeers)-len(peers) == 0 { - initializePeers = peers - break - } - - if len(peers) > len(s.config.JoinPeers) { - s.logger.Printf("waiting for join peers to match config specified. found %v, config specified %v", peers, s.config.JoinPeers) - } else { - s.logger.Printf("Waiting for %d join peers. Have %v. Asking nodes: %v", len(s.config.JoinPeers)-len(peers), peers, joinPeers) - } - time.Sleep(time.Second) - } - } - - if err := s.setOpen(); err != nil { - return err - } - - // Create the root directory if it doesn't already exist. - if err := os.MkdirAll(s.path, 0777); err != nil { - return fmt.Errorf("mkdir all: %s", err) - } - - // Open the raft store. - if err := s.openRaft(initializePeers, raftln); err != nil { - return fmt.Errorf("raft: %s", err) - } - - if len(joinPeers) > 0 { - c := NewClient() - c.SetMetaServers(joinPeers) - c.SetTLS(s.config.HTTPSEnabled) - if err := c.Open(); err != nil { - return err - } - defer c.Close() - - n, err := c.JoinMetaServer(s.httpAddr, s.raftAddr) - if err != nil { - return err - } - s.node.ID = n.ID - if err := s.node.Save(); err != nil { - return err - } - - } - - // Wait for a leader to be elected so we know the raft log is loaded - // and up to date - if err := s.waitForLeader(0); err != nil { - return err - } - - // Make sure this server is in the list of metanodes - peers, err := s.raftState.peers() - if err != nil { - return err - } - if len(peers) <= 1 { - // we have to loop here because if the hostname has changed - // raft will take a little bit to normalize so that this host - // will be marked as the leader - for { - err := s.setMetaNode(s.httpAddr, s.raftAddr) - if err == nil { - break - } - time.Sleep(100 * time.Millisecond) - } - } - - return nil -} - -func (s *store) setOpen() error { - s.mu.Lock() - defer s.mu.Unlock() - // Check if store has already been opened. - if s.opened { - return ErrStoreOpen - } - s.opened = true - return nil -} - -// peers returns the raft peers known to this store -func (s *store) peers() []string { - s.mu.RLock() - defer s.mu.RUnlock() - if s.raftState == nil { - return []string{s.raftAddr} - } - peers, err := s.raftState.peers() - if err != nil { - return []string{s.raftAddr} - } - return peers -} - -func (s *store) filterAddr(addrs []string, filter string) ([]string, error) { - host, port, err := net.SplitHostPort(filter) - if err != nil { - return nil, err - } - - ip, err := net.ResolveIPAddr("ip", host) - if err != nil { - return nil, err - } - - var joinPeers []string - for _, addr := range addrs { - joinHost, joinPort, err := net.SplitHostPort(addr) - if err != nil { - return nil, err - } - - joinIp, err := net.ResolveIPAddr("ip", joinHost) - if err != nil { - return nil, err - } - - // Don't allow joining ourselves - if ip.String() == joinIp.String() && port == joinPort { - continue - } - joinPeers = append(joinPeers, addr) - } - return joinPeers, nil -} - -func (s *store) openRaft(initializePeers []string, raftln net.Listener) error { - s.mu.Lock() - defer s.mu.Unlock() - rs := newRaftState(s.config, s.raftAddr) - rs.logger = s.logger - rs.path = s.path - - if err := rs.open(s, raftln, initializePeers); err != nil { - return err - } - s.raftState = rs - - return nil -} - -func (s *store) close() error { - s.mu.Lock() - defer s.mu.Unlock() - - select { - case <-s.closing: - // already closed - return nil - default: - close(s.closing) - return s.raftState.close() - } -} - -func (s *store) snapshot() (*Data, error) { - s.mu.RLock() - defer s.mu.RUnlock() - return s.data.Clone(), nil -} - -// afterIndex returns a channel that will be closed to signal -// the caller when an updated snapshot is available. -func (s *store) afterIndex(index uint64) <-chan struct{} { - s.mu.RLock() - defer s.mu.RUnlock() - - if index < s.data.Index { - // Client needs update so return a closed channel. - ch := make(chan struct{}) - close(ch) - return ch - } - - return s.dataChanged -} - -// WaitForLeader sleeps until a leader is found or a timeout occurs. -// timeout == 0 means to wait forever. -func (s *store) waitForLeader(timeout time.Duration) error { - // Begin timeout timer. - timer := time.NewTimer(timeout) - defer timer.Stop() - - // Continually check for leader until timeout. - ticker := time.NewTicker(100 * time.Millisecond) - defer ticker.Stop() - for { - select { - case <-s.closing: - return errors.New("closing") - case <-timer.C: - if timeout != 0 { - return errors.New("timeout") - } - case <-ticker.C: - if s.leader() != "" { - return nil - } - } - } -} - -// isLeader returns true if the store is currently the leader. -func (s *store) isLeader() bool { - s.mu.RLock() - defer s.mu.RUnlock() - if s.raftState == nil { - return false - } - return s.raftState.raft.State() == raft.Leader -} - -// leader returns what the store thinks is the current leader. An empty -// string indicates no leader exists. -func (s *store) leader() string { - s.mu.RLock() - defer s.mu.RUnlock() - if s.raftState == nil || s.raftState.raft == nil { - return "" - } - return s.raftState.raft.Leader() -} - -// leaderHTTP returns the HTTP API connection info for the metanode -// that is the raft leader -func (s *store) leaderHTTP() string { - s.mu.RLock() - defer s.mu.RUnlock() - if s.raftState == nil { - return "" - } - l := s.raftState.raft.Leader() - - for _, n := range s.data.MetaNodes { - if n.TCPHost == l { - return n.Host - } - } - - return "" -} - -// otherMetaServersHTTP will return the HTTP bind addresses of the other -// meta servers in the cluster -func (s *store) otherMetaServersHTTP() []string { - s.mu.RLock() - defer s.mu.RUnlock() - - var a []string - for _, n := range s.data.MetaNodes { - if n.TCPHost != s.raftAddr { - a = append(a, n.Host) - } - } - return a -} - -// index returns the current store index. -func (s *store) index() uint64 { - s.mu.RLock() - defer s.mu.RUnlock() - return s.data.Index -} - -// apply applies a command to raft. -func (s *store) apply(b []byte) error { - if s.raftState == nil { - return fmt.Errorf("store not open") - } - return s.raftState.apply(b) -} - -// join adds a new server to the metaservice and raft -func (s *store) join(n *NodeInfo) (*NodeInfo, error) { - s.mu.RLock() - if s.raftState == nil { - s.mu.RUnlock() - return nil, fmt.Errorf("store not open") - } - if err := s.raftState.addPeer(n.TCPHost); err != nil { - s.mu.RUnlock() - return nil, err - } - s.mu.RUnlock() - - if err := s.createMetaNode(n.Host, n.TCPHost); err != nil { - return nil, err - } - - s.mu.RLock() - defer s.mu.RUnlock() - for _, node := range s.data.MetaNodes { - if node.TCPHost == n.TCPHost && node.Host == n.Host { - return &node, nil - } - } - return nil, ErrNodeNotFound -} - -// leave removes a server from the metaservice and raft -func (s *store) leave(n *NodeInfo) error { - return s.raftState.removePeer(n.TCPHost) -} - -// createMetaNode is used by the join command to create the metanode int -// the metastore -func (s *store) createMetaNode(addr, raftAddr string) error { - val := &internal.CreateMetaNodeCommand{ - HTTPAddr: proto.String(addr), - TCPAddr: proto.String(raftAddr), - Rand: proto.Uint64(uint64(rand.Int63())), - } - t := internal.Command_CreateMetaNodeCommand - cmd := &internal.Command{Type: &t} - if err := proto.SetExtension(cmd, internal.E_CreateMetaNodeCommand_Command, val); err != nil { - panic(err) - } - - b, err := proto.Marshal(cmd) - if err != nil { - return err - } - - return s.apply(b) -} - -// setMetaNode is used when the raft group has only a single peer. It will -// either create a metanode or update the information for the one metanode -// that is there. It's used because hostnames can change -func (s *store) setMetaNode(addr, raftAddr string) error { - val := &internal.SetMetaNodeCommand{ - HTTPAddr: proto.String(addr), - TCPAddr: proto.String(raftAddr), - Rand: proto.Uint64(uint64(rand.Int63())), - } - t := internal.Command_SetMetaNodeCommand - cmd := &internal.Command{Type: &t} - if err := proto.SetExtension(cmd, internal.E_SetMetaNodeCommand_Command, val); err != nil { - panic(err) - } - - b, err := proto.Marshal(cmd) - if err != nil { - return err - } - - return s.apply(b) -} diff --git a/services/meta/store_fsm.go b/services/meta/store_fsm.go deleted file mode 100644 index 02deda466b7..00000000000 --- a/services/meta/store_fsm.go +++ /dev/null @@ -1,654 +0,0 @@ -package meta - -import ( - "fmt" - "io" - "io/ioutil" - "time" - - "github.com/gogo/protobuf/proto" - "github.com/hashicorp/raft" - "github.com/influxdata/influxdb/influxql" - "github.com/influxdata/influxdb/services/meta/internal" -) - -// storeFSM represents the finite state machine used by Store to interact with Raft. -type storeFSM store - -func (fsm *storeFSM) Apply(l *raft.Log) interface{} { - var cmd internal.Command - if err := proto.Unmarshal(l.Data, &cmd); err != nil { - panic(fmt.Errorf("cannot marshal command: %x", l.Data)) - } - - // Lock the store. - s := (*store)(fsm) - s.mu.Lock() - defer s.mu.Unlock() - - err := func() interface{} { - switch cmd.GetType() { - case internal.Command_RemovePeerCommand: - return fsm.applyRemovePeerCommand(&cmd) - case internal.Command_CreateNodeCommand: - // create node was in < 0.10.0 servers, we need the peers - // list to convert to the appropriate data/meta nodes now - peers, err := s.raftState.peers() - if err != nil { - return err - } - return fsm.applyCreateNodeCommand(&cmd, peers) - case internal.Command_DeleteNodeCommand: - return fsm.applyDeleteNodeCommand(&cmd) - case internal.Command_CreateDatabaseCommand: - return fsm.applyCreateDatabaseCommand(&cmd) - case internal.Command_DropDatabaseCommand: - return fsm.applyDropDatabaseCommand(&cmd) - case internal.Command_CreateRetentionPolicyCommand: - return fsm.applyCreateRetentionPolicyCommand(&cmd) - case internal.Command_DropRetentionPolicyCommand: - return fsm.applyDropRetentionPolicyCommand(&cmd) - case internal.Command_SetDefaultRetentionPolicyCommand: - return fsm.applySetDefaultRetentionPolicyCommand(&cmd) - case internal.Command_UpdateRetentionPolicyCommand: - return fsm.applyUpdateRetentionPolicyCommand(&cmd) - case internal.Command_CreateShardGroupCommand: - return fsm.applyCreateShardGroupCommand(&cmd) - case internal.Command_DeleteShardGroupCommand: - return fsm.applyDeleteShardGroupCommand(&cmd) - case internal.Command_CreateContinuousQueryCommand: - return fsm.applyCreateContinuousQueryCommand(&cmd) - case internal.Command_DropContinuousQueryCommand: - return fsm.applyDropContinuousQueryCommand(&cmd) - case internal.Command_CreateSubscriptionCommand: - return fsm.applyCreateSubscriptionCommand(&cmd) - case internal.Command_DropSubscriptionCommand: - return fsm.applyDropSubscriptionCommand(&cmd) - case internal.Command_CreateUserCommand: - return fsm.applyCreateUserCommand(&cmd) - case internal.Command_DropUserCommand: - return fsm.applyDropUserCommand(&cmd) - case internal.Command_UpdateUserCommand: - return fsm.applyUpdateUserCommand(&cmd) - case internal.Command_SetPrivilegeCommand: - return fsm.applySetPrivilegeCommand(&cmd) - case internal.Command_SetAdminPrivilegeCommand: - return fsm.applySetAdminPrivilegeCommand(&cmd) - case internal.Command_SetDataCommand: - return fsm.applySetDataCommand(&cmd) - case internal.Command_UpdateNodeCommand: - return fsm.applyUpdateNodeCommand(&cmd) - case internal.Command_CreateMetaNodeCommand: - return fsm.applyCreateMetaNodeCommand(&cmd) - case internal.Command_DeleteMetaNodeCommand: - return fsm.applyDeleteMetaNodeCommand(&cmd, s) - case internal.Command_SetMetaNodeCommand: - return fsm.applySetMetaNodeCommand(&cmd) - case internal.Command_CreateDataNodeCommand: - return fsm.applyCreateDataNodeCommand(&cmd) - case internal.Command_DeleteDataNodeCommand: - return fsm.applyDeleteDataNodeCommand(&cmd) - default: - panic(fmt.Errorf("cannot apply command: %x", l.Data)) - } - }() - - // Copy term and index to new metadata. - fsm.data.Term = l.Term - fsm.data.Index = l.Index - - // signal that the data changed - close(s.dataChanged) - s.dataChanged = make(chan struct{}) - - return err -} - -func (fsm *storeFSM) applyRemovePeerCommand(cmd *internal.Command) interface{} { - ext, _ := proto.GetExtension(cmd, internal.E_RemovePeerCommand_Command) - v := ext.(*internal.RemovePeerCommand) - - addr := v.GetAddr() - - // Only do this if you are the leader - if fsm.raftState.isLeader() { - //Remove that node from the peer - fsm.logger.Printf("removing peer: %s", addr) - if err := fsm.raftState.removePeer(addr); err != nil { - fsm.logger.Printf("error removing peer: %s", err) - } - } - - return nil -} - -func (fsm *storeFSM) applyCreateNodeCommand(cmd *internal.Command, peers []string) interface{} { - ext, _ := proto.GetExtension(cmd, internal.E_CreateNodeCommand_Command) - v := ext.(*internal.CreateNodeCommand) - - // Copy data and update. - other := fsm.data.Clone() - - // CreateNode is a command from < 0.10.0 clusters. Every node in - // those clusters would be a data node and only the nodes that are - // in the list of peers would be meta nodes - isMeta := false - for _, p := range peers { - if v.GetHost() == p { - isMeta = true - break - } - } - - if isMeta { - if err := other.CreateMetaNode(v.GetHost(), v.GetHost()); err != nil { - return err - } - } - - // Get the only meta node - if len(other.MetaNodes) == 1 { - metaNode := other.MetaNodes[0] - - if err := other.SetDataNode(metaNode.ID, v.GetHost(), v.GetHost()); err != nil { - return err - } - } else { - if err := other.CreateDataNode(v.GetHost(), v.GetHost()); err != nil { - return err - } - } - - // If the cluster ID hasn't been set then use the command's random number. - if other.ClusterID == 0 { - other.ClusterID = uint64(v.GetRand()) - } - - fsm.data = other - return nil -} - -// applyUpdateNodeCommand was in < 0.10.0, noop this now -func (fsm *storeFSM) applyUpdateNodeCommand(cmd *internal.Command) interface{} { - return nil -} - -func (fsm *storeFSM) applyUpdateDataNodeCommand(cmd *internal.Command) interface{} { - ext, _ := proto.GetExtension(cmd, internal.E_CreateNodeCommand_Command) - v := ext.(*internal.UpdateDataNodeCommand) - - // Copy data and update. - other := fsm.data.Clone() - - node := other.DataNode(v.GetID()) - if node == nil { - return ErrNodeNotFound - } - - node.Host = v.GetHost() - node.TCPHost = v.GetTCPHost() - - fsm.data = other - return nil -} - -// applyDeleteNodeCommand is from < 0.10.0. no op for this one -func (fsm *storeFSM) applyDeleteNodeCommand(cmd *internal.Command) interface{} { - return nil -} - -func (fsm *storeFSM) applyCreateDatabaseCommand(cmd *internal.Command) interface{} { - ext, _ := proto.GetExtension(cmd, internal.E_CreateDatabaseCommand_Command) - v := ext.(*internal.CreateDatabaseCommand) - - // Copy data and update. - other := fsm.data.Clone() - if err := other.CreateDatabase(v.GetName()); err != nil { - return err - } - - s := (*store)(fsm) - if rpi := v.GetRetentionPolicy(); rpi != nil { - if err := other.CreateRetentionPolicy(v.GetName(), &RetentionPolicyInfo{ - Name: rpi.GetName(), - ReplicaN: int(rpi.GetReplicaN()), - Duration: time.Duration(rpi.GetDuration()), - ShardGroupDuration: time.Duration(rpi.GetShardGroupDuration()), - }); err != nil { - if err == ErrRetentionPolicyExists { - return ErrRetentionPolicyConflict - } - return err - } - - // Set it as the default retention policy. - if err := other.SetDefaultRetentionPolicy(v.GetName(), rpi.GetName()); err != nil { - return err - } - } else if s.config.RetentionAutoCreate { - // Read node count. - // Retention policies must be fully replicated. - replicaN := len(other.DataNodes) - if replicaN > maxAutoCreatedRetentionPolicyReplicaN { - replicaN = maxAutoCreatedRetentionPolicyReplicaN - } else if replicaN < 1 { - replicaN = 1 - } - - // Create a retention policy. - rpi := NewRetentionPolicyInfo(autoCreateRetentionPolicyName) - rpi.ReplicaN = replicaN - rpi.Duration = autoCreateRetentionPolicyPeriod - if err := other.CreateRetentionPolicy(v.GetName(), rpi); err != nil { - return err - } - - // Set it as the default retention policy. - if err := other.SetDefaultRetentionPolicy(v.GetName(), autoCreateRetentionPolicyName); err != nil { - return err - } - } - - fsm.data = other - - return nil -} - -func (fsm *storeFSM) applyDropDatabaseCommand(cmd *internal.Command) interface{} { - ext, _ := proto.GetExtension(cmd, internal.E_DropDatabaseCommand_Command) - v := ext.(*internal.DropDatabaseCommand) - - // Copy data and update. - other := fsm.data.Clone() - if err := other.DropDatabase(v.GetName()); err != nil { - return err - } - fsm.data = other - - return nil -} - -func (fsm *storeFSM) applyCreateRetentionPolicyCommand(cmd *internal.Command) interface{} { - ext, _ := proto.GetExtension(cmd, internal.E_CreateRetentionPolicyCommand_Command) - v := ext.(*internal.CreateRetentionPolicyCommand) - pb := v.GetRetentionPolicy() - - // Copy data and update. - other := fsm.data.Clone() - if err := other.CreateRetentionPolicy(v.GetDatabase(), - &RetentionPolicyInfo{ - Name: pb.GetName(), - ReplicaN: int(pb.GetReplicaN()), - Duration: time.Duration(pb.GetDuration()), - ShardGroupDuration: time.Duration(pb.GetShardGroupDuration()), - }); err != nil { - return err - } - fsm.data = other - - return nil -} - -func (fsm *storeFSM) applyDropRetentionPolicyCommand(cmd *internal.Command) interface{} { - ext, _ := proto.GetExtension(cmd, internal.E_DropRetentionPolicyCommand_Command) - v := ext.(*internal.DropRetentionPolicyCommand) - - // Copy data and update. - other := fsm.data.Clone() - if err := other.DropRetentionPolicy(v.GetDatabase(), v.GetName()); err != nil { - return err - } - fsm.data = other - - return nil -} - -func (fsm *storeFSM) applySetDefaultRetentionPolicyCommand(cmd *internal.Command) interface{} { - ext, _ := proto.GetExtension(cmd, internal.E_SetDefaultRetentionPolicyCommand_Command) - v := ext.(*internal.SetDefaultRetentionPolicyCommand) - - // Copy data and update. - other := fsm.data.Clone() - if err := other.SetDefaultRetentionPolicy(v.GetDatabase(), v.GetName()); err != nil { - return err - } - fsm.data = other - - return nil -} - -func (fsm *storeFSM) applyUpdateRetentionPolicyCommand(cmd *internal.Command) interface{} { - ext, _ := proto.GetExtension(cmd, internal.E_UpdateRetentionPolicyCommand_Command) - v := ext.(*internal.UpdateRetentionPolicyCommand) - - // Create update object. - rpu := RetentionPolicyUpdate{Name: v.NewName} - if v.Duration != nil { - value := time.Duration(v.GetDuration()) - rpu.Duration = &value - } - if v.ReplicaN != nil { - value := int(v.GetReplicaN()) - rpu.ReplicaN = &value - } - - // Copy data and update. - other := fsm.data.Clone() - if err := other.UpdateRetentionPolicy(v.GetDatabase(), v.GetName(), &rpu); err != nil { - return err - } - fsm.data = other - - return nil -} - -func (fsm *storeFSM) applyCreateShardGroupCommand(cmd *internal.Command) interface{} { - ext, _ := proto.GetExtension(cmd, internal.E_CreateShardGroupCommand_Command) - v := ext.(*internal.CreateShardGroupCommand) - - // Copy data and update. - other := fsm.data.Clone() - if err := other.CreateShardGroup(v.GetDatabase(), v.GetPolicy(), time.Unix(0, v.GetTimestamp())); err != nil { - return err - } - fsm.data = other - - return nil -} - -func (fsm *storeFSM) applyDeleteShardGroupCommand(cmd *internal.Command) interface{} { - ext, _ := proto.GetExtension(cmd, internal.E_DeleteShardGroupCommand_Command) - v := ext.(*internal.DeleteShardGroupCommand) - - // Copy data and update. - other := fsm.data.Clone() - if err := other.DeleteShardGroup(v.GetDatabase(), v.GetPolicy(), v.GetShardGroupID()); err != nil { - return err - } - fsm.data = other - - return nil -} - -func (fsm *storeFSM) applyCreateContinuousQueryCommand(cmd *internal.Command) interface{} { - ext, _ := proto.GetExtension(cmd, internal.E_CreateContinuousQueryCommand_Command) - v := ext.(*internal.CreateContinuousQueryCommand) - - // Copy data and update. - other := fsm.data.Clone() - if err := other.CreateContinuousQuery(v.GetDatabase(), v.GetName(), v.GetQuery()); err != nil { - return err - } - fsm.data = other - - return nil -} - -func (fsm *storeFSM) applyDropContinuousQueryCommand(cmd *internal.Command) interface{} { - ext, _ := proto.GetExtension(cmd, internal.E_DropContinuousQueryCommand_Command) - v := ext.(*internal.DropContinuousQueryCommand) - - // Copy data and update. - other := fsm.data.Clone() - if err := other.DropContinuousQuery(v.GetDatabase(), v.GetName()); err != nil { - return err - } - fsm.data = other - - return nil -} - -func (fsm *storeFSM) applyCreateSubscriptionCommand(cmd *internal.Command) interface{} { - ext, _ := proto.GetExtension(cmd, internal.E_CreateSubscriptionCommand_Command) - v := ext.(*internal.CreateSubscriptionCommand) - - // Copy data and update. - other := fsm.data.Clone() - if err := other.CreateSubscription(v.GetDatabase(), v.GetRetentionPolicy(), v.GetName(), v.GetMode(), v.GetDestinations()); err != nil { - return err - } - fsm.data = other - - return nil -} - -func (fsm *storeFSM) applyDropSubscriptionCommand(cmd *internal.Command) interface{} { - ext, _ := proto.GetExtension(cmd, internal.E_DropSubscriptionCommand_Command) - v := ext.(*internal.DropSubscriptionCommand) - - // Copy data and update. - other := fsm.data.Clone() - if err := other.DropSubscription(v.GetDatabase(), v.GetRetentionPolicy(), v.GetName()); err != nil { - return err - } - fsm.data = other - - return nil -} - -func (fsm *storeFSM) applyCreateUserCommand(cmd *internal.Command) interface{} { - ext, _ := proto.GetExtension(cmd, internal.E_CreateUserCommand_Command) - v := ext.(*internal.CreateUserCommand) - - // Copy data and update. - other := fsm.data.Clone() - if err := other.CreateUser(v.GetName(), v.GetHash(), v.GetAdmin()); err != nil { - return err - } - fsm.data = other - - return nil -} - -func (fsm *storeFSM) applyDropUserCommand(cmd *internal.Command) interface{} { - ext, _ := proto.GetExtension(cmd, internal.E_DropUserCommand_Command) - v := ext.(*internal.DropUserCommand) - - // Copy data and update. - other := fsm.data.Clone() - if err := other.DropUser(v.GetName()); err != nil { - return err - } - fsm.data = other - return nil -} - -func (fsm *storeFSM) applyUpdateUserCommand(cmd *internal.Command) interface{} { - ext, _ := proto.GetExtension(cmd, internal.E_UpdateUserCommand_Command) - v := ext.(*internal.UpdateUserCommand) - - // Copy data and update. - other := fsm.data.Clone() - if err := other.UpdateUser(v.GetName(), v.GetHash()); err != nil { - return err - } - fsm.data = other - return nil -} - -func (fsm *storeFSM) applySetPrivilegeCommand(cmd *internal.Command) interface{} { - ext, _ := proto.GetExtension(cmd, internal.E_SetPrivilegeCommand_Command) - v := ext.(*internal.SetPrivilegeCommand) - - // Copy data and update. - other := fsm.data.Clone() - if err := other.SetPrivilege(v.GetUsername(), v.GetDatabase(), influxql.Privilege(v.GetPrivilege())); err != nil { - return err - } - fsm.data = other - return nil -} - -func (fsm *storeFSM) applySetAdminPrivilegeCommand(cmd *internal.Command) interface{} { - ext, _ := proto.GetExtension(cmd, internal.E_SetAdminPrivilegeCommand_Command) - v := ext.(*internal.SetAdminPrivilegeCommand) - - // Copy data and update. - other := fsm.data.Clone() - if err := other.SetAdminPrivilege(v.GetUsername(), v.GetAdmin()); err != nil { - return err - } - fsm.data = other - return nil -} - -func (fsm *storeFSM) applySetDataCommand(cmd *internal.Command) interface{} { - ext, _ := proto.GetExtension(cmd, internal.E_SetDataCommand_Command) - v := ext.(*internal.SetDataCommand) - - // Overwrite data. - fsm.data = &Data{} - fsm.data.unmarshal(v.GetData()) - - return nil -} - -func (fsm *storeFSM) applyCreateMetaNodeCommand(cmd *internal.Command) interface{} { - ext, _ := proto.GetExtension(cmd, internal.E_CreateMetaNodeCommand_Command) - v := ext.(*internal.CreateMetaNodeCommand) - - other := fsm.data.Clone() - other.CreateMetaNode(v.GetHTTPAddr(), v.GetTCPAddr()) - - // If the cluster ID hasn't been set then use the command's random number. - if other.ClusterID == 0 { - other.ClusterID = uint64(v.GetRand()) - } - - fsm.data = other - return nil -} - -func (fsm *storeFSM) applySetMetaNodeCommand(cmd *internal.Command) interface{} { - ext, _ := proto.GetExtension(cmd, internal.E_SetMetaNodeCommand_Command) - v := ext.(*internal.SetMetaNodeCommand) - - other := fsm.data.Clone() - other.SetMetaNode(v.GetHTTPAddr(), v.GetTCPAddr()) - - // If the cluster ID hasn't been set then use the command's random number. - if other.ClusterID == 0 { - other.ClusterID = uint64(v.GetRand()) - } - - fsm.data = other - return nil -} - -func (fsm *storeFSM) applyDeleteMetaNodeCommand(cmd *internal.Command, s *store) interface{} { - ext, _ := proto.GetExtension(cmd, internal.E_DeleteMetaNodeCommand_Command) - v := ext.(*internal.DeleteMetaNodeCommand) - - other := fsm.data.Clone() - node := other.MetaNode(v.GetID()) - if node == nil { - return ErrNodeNotFound - } - - if err := s.leave(node); err != nil && err != raft.ErrNotLeader { - return err - } - - if err := other.DeleteMetaNode(v.GetID()); err != nil { - return err - } - fsm.data = other - return nil -} - -func (fsm *storeFSM) applyCreateDataNodeCommand(cmd *internal.Command) interface{} { - ext, _ := proto.GetExtension(cmd, internal.E_CreateDataNodeCommand_Command) - v := ext.(*internal.CreateDataNodeCommand) - - other := fsm.data.Clone() - - // Get the only meta node - if len(other.MetaNodes) == 1 && len(other.DataNodes) == 0 { - metaNode := other.MetaNodes[0] - - if err := other.SetDataNode(metaNode.ID, v.GetHTTPAddr(), v.GetTCPAddr()); err != nil { - return err - } - } else { - other.CreateDataNode(v.GetHTTPAddr(), v.GetTCPAddr()) - } - fsm.data = other - return nil -} - -func (fsm *storeFSM) applyDeleteDataNodeCommand(cmd *internal.Command) interface{} { - ext, _ := proto.GetExtension(cmd, internal.E_DeleteDataNodeCommand_Command) - v := ext.(*internal.DeleteDataNodeCommand) - - other := fsm.data.Clone() - if err := other.DeleteDataNode(v.GetID()); err != nil { - return err - } - fsm.data = other - return nil -} - -func (fsm *storeFSM) Snapshot() (raft.FSMSnapshot, error) { - s := (*store)(fsm) - s.mu.Lock() - defer s.mu.Unlock() - - return &storeFSMSnapshot{Data: (*store)(fsm).data}, nil -} - -func (fsm *storeFSM) Restore(r io.ReadCloser) error { - // Read all bytes. - b, err := ioutil.ReadAll(r) - if err != nil { - return err - } - - // Decode metadata. - data := &Data{} - if err := data.UnmarshalBinary(b); err != nil { - return err - } - - // Set metadata on store. - // NOTE: No lock because Hashicorp Raft doesn't call Restore concurrently - // with any other function. - fsm.data = data - - return nil -} - -type storeFSMSnapshot struct { - Data *Data -} - -func (s *storeFSMSnapshot) Persist(sink raft.SnapshotSink) error { - err := func() error { - // Encode data. - p, err := s.Data.MarshalBinary() - if err != nil { - return err - } - - // Write data to sink. - if _, err := sink.Write(p); err != nil { - return err - } - - // Close the sink. - if err := sink.Close(); err != nil { - return err - } - - return nil - }() - - if err != nil { - sink.Cancel() - return err - } - - return nil -} - -// Release is invoked when we are finished with the snapshot -func (s *storeFSMSnapshot) Release() {} diff --git a/services/opentsdb/handler.go b/services/opentsdb/handler.go index 2c8f0e22b90..0c959d68346 100644 --- a/services/opentsdb/handler.go +++ b/services/opentsdb/handler.go @@ -19,9 +19,8 @@ import ( // Handler is an http.Handler for the service. type Handler struct { - Database string - RetentionPolicy string - ConsistencyLevel cluster.ConsistencyLevel + Database string + RetentionPolicy string PointsWriter interface { WritePoints(p *cluster.WritePointsRequest) error @@ -124,10 +123,9 @@ func (h *Handler) servePut(w http.ResponseWriter, r *http.Request) { // Write points. if err := h.PointsWriter.WritePoints(&cluster.WritePointsRequest{ - Database: h.Database, - RetentionPolicy: h.RetentionPolicy, - ConsistencyLevel: h.ConsistencyLevel, - Points: points, + Database: h.Database, + RetentionPolicy: h.RetentionPolicy, + Points: points, }); influxdb.IsClientError(err) { h.Logger.Println("write series error: ", err) http.Error(w, "write series error: "+err.Error(), http.StatusBadRequest) diff --git a/services/opentsdb/service.go b/services/opentsdb/service.go index bd2a2714127..ea239a068c7 100644 --- a/services/opentsdb/service.go +++ b/services/opentsdb/service.go @@ -57,10 +57,9 @@ type Service struct { tls bool cert string - BindAddress string - Database string - RetentionPolicy string - ConsistencyLevel cluster.ConsistencyLevel + BindAddress string + Database string + RetentionPolicy string PointsWriter interface { WritePoints(p *cluster.WritePointsRequest) error @@ -82,25 +81,19 @@ type Service struct { // NewService returns a new instance of Service. func NewService(c Config) (*Service, error) { - consistencyLevel, err := cluster.ParseConsistencyLevel(c.ConsistencyLevel) - if err != nil { - return nil, err - } - s := &Service{ - done: make(chan struct{}), - tls: c.TLSEnabled, - cert: c.Certificate, - err: make(chan error), - BindAddress: c.BindAddress, - Database: c.Database, - RetentionPolicy: c.RetentionPolicy, - ConsistencyLevel: consistencyLevel, - batchSize: c.BatchSize, - batchPending: c.BatchPending, - batchTimeout: time.Duration(c.BatchTimeout), - Logger: log.New(os.Stderr, "[opentsdb] ", log.LstdFlags), - LogPointErrors: c.LogPointErrors, + done: make(chan struct{}), + tls: c.TLSEnabled, + cert: c.Certificate, + err: make(chan error), + BindAddress: c.BindAddress, + Database: c.Database, + RetentionPolicy: c.RetentionPolicy, + batchSize: c.BatchSize, + batchPending: c.BatchPending, + batchTimeout: time.Duration(c.BatchTimeout), + Logger: log.New(os.Stderr, "[opentsdb] ", log.LstdFlags), + LogPointErrors: c.LogPointErrors, } return s, nil } @@ -357,12 +350,11 @@ func (s *Service) handleTelnetConn(conn net.Conn) { // serveHTTP handles connections in HTTP format. func (s *Service) serveHTTP() { srv := &http.Server{Handler: &Handler{ - Database: s.Database, - RetentionPolicy: s.RetentionPolicy, - ConsistencyLevel: s.ConsistencyLevel, - PointsWriter: s.PointsWriter, - Logger: s.Logger, - statMap: s.statMap, + Database: s.Database, + RetentionPolicy: s.RetentionPolicy, + PointsWriter: s.PointsWriter, + Logger: s.Logger, + statMap: s.statMap, }} srv.Serve(s.httpln) } @@ -374,10 +366,9 @@ func (s *Service) processBatches(batcher *tsdb.PointBatcher) { select { case batch := <-batcher.Out(): if err := s.PointsWriter.WritePoints(&cluster.WritePointsRequest{ - Database: s.Database, - RetentionPolicy: s.RetentionPolicy, - ConsistencyLevel: s.ConsistencyLevel, - Points: batch, + Database: s.Database, + RetentionPolicy: s.RetentionPolicy, + Points: batch, }); err == nil { s.statMap.Add(statBatchesTrasmitted, 1) s.statMap.Add(statPointsTransmitted, int64(len(batch))) diff --git a/services/udp/service.go b/services/udp/service.go index 33b3442587b..25aff0bc20f 100644 --- a/services/udp/service.go +++ b/services/udp/service.go @@ -129,10 +129,9 @@ func (s *Service) writer() { select { case batch := <-s.batcher.Out(): if err := s.PointsWriter.WritePoints(&cluster.WritePointsRequest{ - Database: s.config.Database, - RetentionPolicy: s.config.RetentionPolicy, - ConsistencyLevel: cluster.ConsistencyLevelOne, - Points: batch, + Database: s.config.Database, + RetentionPolicy: s.config.RetentionPolicy, + Points: batch, }); err == nil { s.statMap.Add(statBatchesTrasmitted, 1) s.statMap.Add(statPointsTransmitted, int64(len(batch))) diff --git a/tsdb/config.go b/tsdb/config.go index 12992a369a4..41d3226ce0c 100644 --- a/tsdb/config.go +++ b/tsdb/config.go @@ -72,9 +72,8 @@ const ( // Config holds the configuration for the tsbd package. type Config struct { - Enabled bool `toml:"enabled"` - Dir string `toml:"dir"` - Engine string `toml:"engine"` + Dir string `toml:"dir"` + Engine string `toml:"engine"` // WAL config options for b1 (introduced in 0.9.2) MaxWALSize int `toml:"max-wal-size"` @@ -107,7 +106,6 @@ type Config struct { func NewConfig() Config { return Config{ Engine: DefaultEngine, - Enabled: true, // data node enabled by default MaxWALSize: DefaultMaxWALSize, WALFlushInterval: toml.Duration(DefaultWALFlushInterval), WALPartitionFlushDelay: toml.Duration(DefaultWALPartitionFlushDelay), diff --git a/tsdb/config_test.go b/tsdb/config_test.go index 946364785a8..320d6a95e26 100644 --- a/tsdb/config_test.go +++ b/tsdb/config_test.go @@ -16,9 +16,5 @@ enabled = false t.Fatal(err) } - // Validate configuration. - if c.Enabled == true { - t.Fatalf("unexpected enabled: %v", c.Enabled) - } // TODO: add remaining config tests } From cd84f26c340939d381c92adbe78aef0973576459 Mon Sep 17 00:00:00 2001 From: Cory LaNou Date: Tue, 8 Mar 2016 15:19:10 -0600 Subject: [PATCH 08/14] remove startup check for monitoring --- cmd/influxd/run/config.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/cmd/influxd/run/config.go b/cmd/influxd/run/config.go index 5d7e9123809..60a651646e5 100644 --- a/cmd/influxd/run/config.go +++ b/cmd/influxd/run/config.go @@ -137,12 +137,6 @@ func (c *Config) Validate() error { return err } - // If the config is for a meta-only node, we can't store monitor stats - // locally. - if c.Monitor.StoreEnabled { - return fmt.Errorf("monitor storage can not be enabled on meta only nodes") - } - if err := c.Data.Validate(); err != nil { return err } From 43e345cf4d2def0b9269f2ff9eb84e6235fa5621 Mon Sep 17 00:00:00 2001 From: Cory LaNou Date: Tue, 8 Mar 2016 15:42:50 -0600 Subject: [PATCH 09/14] fix error in write --- cluster/points_writer.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cluster/points_writer.go b/cluster/points_writer.go index 36438bc7374..700cd34e7a1 100644 --- a/cluster/points_writer.go +++ b/cluster/points_writer.go @@ -252,6 +252,11 @@ func (w *PointsWriter) writeToShard(shard *meta.ShardInfo, database, retentionPo w.statMap.Add(statPointWriteReqLocal, int64(len(points))) err := w.TSDBStore.WriteToShard(shard.ID, points) + if err == nil { + w.statMap.Add(statWriteOK, 1) + return nil + } + // If we've written to shard that should exist on the current node, but the store has // not actually created this shard, tell it to create it and retry the write if err == tsdb.ErrShardNotFound { From e2e231310e057833918061940424495578003779 Mon Sep 17 00:00:00 2001 From: Cory LaNou Date: Thu, 10 Mar 2016 13:40:26 -0600 Subject: [PATCH 10/14] make work with p-meta --- services/meta/data.go | 34 +++++++++++++++++----------------- services/meta/data_test.go | 4 ++-- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/services/meta/data.go b/services/meta/data.go index fcce424b275..5b0e100f4b7 100644 --- a/services/meta/data.go +++ b/services/meta/data.go @@ -38,11 +38,11 @@ type Data struct { MaxShardID uint64 } -// newShardOwner sets the owner of the provided shard to the data node +// NewShardOwner sets the owner of the provided shard to the data node // that currently owns the fewest number of shards. If multiple nodes // own the same (fewest) number of shards, then one of those nodes // becomes the new shard owner. -func newShardOwner(s ShardInfo, ownerFreqs map[int]int) (uint64, error) { +func NewShardOwner(s ShardInfo, ownerFreqs map[int]int) (uint64, error) { var ( minId = -1 minFreq int @@ -495,6 +495,19 @@ func (data *Data) UpdateUser(name, hash string) error { return ErrUserNotFound } +// CloneUserInfos returns a copy of the user infos +func (data *Data) CloneUserInfos() []UserInfo { + if len(data.Users) == 0 { + return []UserInfo{} + } + users := make([]UserInfo, len(data.Users)) + for i := range data.Users { + users[i] = data.Users[i].clone() + } + + return users +} + // SetPrivilege sets a privilege for a user on a database. func (data *Data) SetPrivilege(name, database string, p influxql.Privilege) error { ui := data.User(name) @@ -552,21 +565,8 @@ func (data *Data) UserPrivilege(name, database string) (*influxql.Privilege, err func (data *Data) Clone() *Data { other := *data - // Deep copy databases. - if data.Databases != nil { - other.Databases = make([]DatabaseInfo, len(data.Databases)) - for i := range data.Databases { - other.Databases[i] = data.Databases[i].clone() - } - } - - // Copy users. - if data.Users != nil { - other.Users = make([]UserInfo, len(data.Users)) - for i := range data.Users { - other.Users[i] = data.Users[i].clone() - } - } + other.Databases = data.CloneDatabases() + other.Users = data.CloneUserInfos() return &other } diff --git a/services/meta/data_test.go b/services/meta/data_test.go index 50117d7e3aa..39150af1ca0 100644 --- a/services/meta/data_test.go +++ b/services/meta/data_test.go @@ -8,13 +8,13 @@ import ( func TestnewShardOwner(t *testing.T) { // An error is returned if there are no data nodes available. - _, err := newShardOwner(ShardInfo{}, map[int]int{}) + _, err := NewShardOwner(ShardInfo{}, map[int]int{}) if err == nil { t.Error("got no error, but expected one") } ownerFreqs := map[int]int{1: 15, 2: 11, 3: 12} - id, err := newShardOwner(ShardInfo{ID: 4}, ownerFreqs) + id, err := NewShardOwner(ShardInfo{ID: 4}, ownerFreqs) if err != nil { t.Fatal(err) } From d024ca255227de1c7520d9d8a93a91b2455291d2 Mon Sep 17 00:00:00 2001 From: Cory LaNou Date: Thu, 10 Mar 2016 17:22:22 -0600 Subject: [PATCH 11/14] modify WritePoints function signature for p products --- cluster/points_writer.go | 27 +++++++++------------ cluster/points_writer_test.go | 5 ++-- cmd/influxd/run/server.go | 6 +---- cmd/influxd/run/server_test.go | 3 ++- services/collectd/service.go | 9 ++----- services/collectd/service_test.go | 19 +++++++-------- services/graphite/service.go | 10 +++----- services/graphite/service_test.go | 39 +++++++++++++++---------------- services/httpd/handler.go | 15 +++--------- services/opentsdb/handler.go | 9 ++----- services/opentsdb/service.go | 9 ++----- services/opentsdb/service_test.go | 38 ++++++++++++++---------------- services/udp/service.go | 9 ++----- 13 files changed, 77 insertions(+), 121 deletions(-) diff --git a/cluster/points_writer.go b/cluster/points_writer.go index 700cd34e7a1..ca1f7624bb4 100644 --- a/cluster/points_writer.go +++ b/cluster/points_writer.go @@ -181,30 +181,25 @@ func (w *PointsWriter) MapShards(wp *WritePointsRequest) (*ShardMapping, error) // WritePointsInto is a copy of WritePoints that uses a tsdb structure instead of // a cluster structure for information. This is to avoid a circular dependency func (w *PointsWriter) WritePointsInto(p *IntoWriteRequest) error { - req := WritePointsRequest{ - Database: p.Database, - RetentionPolicy: p.RetentionPolicy, - Points: p.Points, - } - return w.WritePoints(&req) + return w.WritePoints(p.Database, p.RetentionPolicy, models.ConsistencyLevelAny, p.Points) } // WritePoints writes across multiple local and remote data nodes according the consistency level. -func (w *PointsWriter) WritePoints(p *WritePointsRequest) error { +func (w *PointsWriter) WritePoints(database, retentionPolicy string, consistencyLevel models.ConsistencyLevel, points []models.Point) error { w.statMap.Add(statWriteReq, 1) - w.statMap.Add(statPointWriteReq, int64(len(p.Points))) + w.statMap.Add(statPointWriteReq, int64(len(points))) - if p.RetentionPolicy == "" { - db, err := w.MetaClient.Database(p.Database) + if retentionPolicy == "" { + db, err := w.MetaClient.Database(database) if err != nil { return err } else if db == nil { - return influxdb.ErrDatabaseNotFound(p.Database) + return influxdb.ErrDatabaseNotFound(database) } - p.RetentionPolicy = db.DefaultRetentionPolicy + retentionPolicy = db.DefaultRetentionPolicy } - shardMappings, err := w.MapShards(p) + shardMappings, err := w.MapShards(&WritePointsRequest{Database: database, RetentionPolicy: retentionPolicy, Points: points}) if err != nil { return err } @@ -214,8 +209,8 @@ func (w *PointsWriter) WritePoints(p *WritePointsRequest) error { ch := make(chan error, len(shardMappings.Points)) for shardID, points := range shardMappings.Points { go func(shard *meta.ShardInfo, database, retentionPolicy string, points []models.Point) { - ch <- w.writeToShard(shard, p.Database, p.RetentionPolicy, points) - }(shardMappings.Shards[shardID], p.Database, p.RetentionPolicy, points) + ch <- w.writeToShard(shard, database, retentionPolicy, points) + }(shardMappings.Shards[shardID], database, retentionPolicy, points) } // Send points to subscriptions if possible. @@ -223,7 +218,7 @@ func (w *PointsWriter) WritePoints(p *WritePointsRequest) error { // We need to lock just in case the channel is about to be nil'ed w.mu.RLock() select { - case w.subPoints <- p: + case w.subPoints <- &WritePointsRequest{Database: database, RetentionPolicy: retentionPolicy, Points: points}: ok = true default: } diff --git a/cluster/points_writer_test.go b/cluster/points_writer_test.go index 96086bdd05c..7af8869f8bf 100644 --- a/cluster/points_writer_test.go +++ b/cluster/points_writer_test.go @@ -2,6 +2,7 @@ package cluster_test import ( "fmt" + "reflect" "sync" "sync/atomic" "testing" @@ -233,7 +234,7 @@ func TestPointsWriter_WritePoints(t *testing.T) { c.Open() defer c.Close() - err := c.WritePoints(pr) + err := c.WritePoints(pr.Database, pr.RetentionPolicy, models.ConsistencyLevelAny, pr.Points) if err == nil && test.expErr != nil { t.Errorf("PointsWriter.WritePoints(): '%s' error: got %v, exp %v", test.name, err, test.expErr) } @@ -247,7 +248,7 @@ func TestPointsWriter_WritePoints(t *testing.T) { if test.expErr == nil { select { case p := <-subPoints: - if p != pr { + if !reflect.DeepEqual(p, pr) { t.Errorf("PointsWriter.WritePoints(): '%s' error: unexpected WritePointsRequest got %v, exp %v", test.name, p, pr) } default: diff --git a/cmd/influxd/run/server.go b/cmd/influxd/run/server.go index 56d1bd411f5..ee0a85b39e7 100644 --- a/cmd/influxd/run/server.go +++ b/cmd/influxd/run/server.go @@ -620,11 +620,7 @@ func (a *tcpaddr) String() string { return a.host } type monitorPointsWriter cluster.PointsWriter func (pw *monitorPointsWriter) WritePoints(database, retentionPolicy string, points models.Points) error { - return (*cluster.PointsWriter)(pw).WritePoints(&cluster.WritePointsRequest{ - Database: database, - RetentionPolicy: retentionPolicy, - Points: points, - }) + return (*cluster.PointsWriter)(pw).WritePoints(database, retentionPolicy, models.ConsistencyLevelAny, points) } func (s *Server) remoteAddr(addr string) string { diff --git a/cmd/influxd/run/server_test.go b/cmd/influxd/run/server_test.go index 63a0d1d4fa1..8d378b56b7a 100644 --- a/cmd/influxd/run/server_test.go +++ b/cmd/influxd/run/server_test.go @@ -10,6 +10,7 @@ import ( "time" "github.com/influxdata/influxdb/cluster" + "github.com/influxdata/influxdb/models" ) // Ensure that HTTP responses include the InfluxDB version. @@ -5563,7 +5564,7 @@ func TestServer_ConcurrentPointsWriter_Subscriber(t *testing.T) { Database: "db0", RetentionPolicy: "rp0", } - s.PointsWriter.WritePoints(wpr) + s.PointsWriter.WritePoints(wpr.Database, wpr.RetentionPolicy, models.ConsistencyLevelAny, wpr.Points) } } }() diff --git a/services/collectd/service.go b/services/collectd/service.go index 685413c73cb..2bb36120fe2 100644 --- a/services/collectd/service.go +++ b/services/collectd/service.go @@ -11,7 +11,6 @@ import ( "time" "github.com/influxdata/influxdb" - "github.com/influxdata/influxdb/cluster" "github.com/influxdata/influxdb/models" "github.com/influxdata/influxdb/services/meta" "github.com/influxdata/influxdb/tsdb" @@ -34,7 +33,7 @@ const ( // pointsWriter is an internal interface to make testing easier. type pointsWriter interface { - WritePoints(p *cluster.WritePointsRequest) error + WritePoints(database, retentionPolicy string, consistencyLevel models.ConsistencyLevel, points []models.Point) error } // metaStore is an internal interface to make testing easier. @@ -244,11 +243,7 @@ func (s *Service) writePoints() { case <-s.stop: return case batch := <-s.batcher.Out(): - if err := s.PointsWriter.WritePoints(&cluster.WritePointsRequest{ - Database: s.Config.Database, - RetentionPolicy: s.Config.RetentionPolicy, - Points: batch, - }); err == nil { + if err := s.PointsWriter.WritePoints(s.Config.Database, s.Config.RetentionPolicy, models.ConsistencyLevelAny, batch); err == nil { s.statMap.Add(statBatchesTrasmitted, 1) s.statMap.Add(statPointsTransmitted, int64(len(batch))) } else { diff --git a/services/collectd/service_test.go b/services/collectd/service_test.go index d475b0aff32..bbd41863537 100644 --- a/services/collectd/service_test.go +++ b/services/collectd/service_test.go @@ -9,7 +9,6 @@ import ( "testing" "time" - "github.com/influxdata/influxdb/cluster" "github.com/influxdata/influxdb/models" "github.com/influxdata/influxdb/services/meta" "github.com/influxdata/influxdb/toml" @@ -56,12 +55,12 @@ func TestService_BatchSize(t *testing.T) { pointCh := make(chan models.Point) s.MetaClient.CreateDatabaseIfNotExistsFn = func(name string) (*meta.DatabaseInfo, error) { return nil, nil } - s.PointsWriter.WritePointsFn = func(req *cluster.WritePointsRequest) error { - if len(req.Points) != batchSize { - t.Errorf("\n\texp = %d\n\tgot = %d\n", batchSize, len(req.Points)) + s.PointsWriter.WritePointsFn = func(database, retentionPolicy string, consistencyLevel models.ConsistencyLevel, points []models.Point) error { + if len(points) != batchSize { + t.Errorf("\n\texp = %d\n\tgot = %d\n", batchSize, len(points)) } - for _, p := range req.Points { + for _, p := range points { pointCh <- p } return nil @@ -125,8 +124,8 @@ func TestService_BatchDuration(t *testing.T) { pointCh := make(chan models.Point, 1000) s.MetaClient.CreateDatabaseIfNotExistsFn = func(name string) (*meta.DatabaseInfo, error) { return nil, nil } - s.PointsWriter.WritePointsFn = func(req *cluster.WritePointsRequest) error { - for _, p := range req.Points { + s.PointsWriter.WritePointsFn = func(database, retentionPolicy string, consistencyLevel models.ConsistencyLevel, points []models.Point) error { + for _, p := range points { pointCh <- p } return nil @@ -209,11 +208,11 @@ func newTestService(batchSize int, batchDuration time.Duration) *testService { } type testPointsWriter struct { - WritePointsFn func(*cluster.WritePointsRequest) error + WritePointsFn func(database, retentionPolicy string, consistencyLevel models.ConsistencyLevel, points []models.Point) error } -func (w *testPointsWriter) WritePoints(p *cluster.WritePointsRequest) error { - return w.WritePointsFn(p) +func (w *testPointsWriter) WritePoints(database, retentionPolicy string, consistencyLevel models.ConsistencyLevel, points []models.Point) error { + return w.WritePointsFn(database, retentionPolicy, consistencyLevel, points) } type testMetaClient struct { diff --git a/services/graphite/service.go b/services/graphite/service.go index c756c8b0084..4bc1131887d 100644 --- a/services/graphite/service.go +++ b/services/graphite/service.go @@ -13,7 +13,7 @@ import ( "time" "github.com/influxdata/influxdb" - "github.com/influxdata/influxdb/cluster" + "github.com/influxdata/influxdb/models" "github.com/influxdata/influxdb/monitor/diagnostics" "github.com/influxdata/influxdb/services/meta" "github.com/influxdata/influxdb/tsdb" @@ -80,7 +80,7 @@ type Service struct { DeregisterDiagnosticsClient(name string) } PointsWriter interface { - WritePoints(p *cluster.WritePointsRequest) error + WritePoints(database, retentionPolicy string, consistencyLevel models.ConsistencyLevel, points []models.Point) error } MetaClient interface { CreateDatabase(name string) (*meta.DatabaseInfo, error) @@ -353,11 +353,7 @@ func (s *Service) processBatches(batcher *tsdb.PointBatcher) { for { select { case batch := <-batcher.Out(): - if err := s.PointsWriter.WritePoints(&cluster.WritePointsRequest{ - Database: s.database, - RetentionPolicy: "", - Points: batch, - }); err == nil { + if err := s.PointsWriter.WritePoints(s.database, "", models.ConsistencyLevelAny, batch); err == nil { s.statMap.Add(statBatchesTransmitted, 1) s.statMap.Add(statPointsTransmitted, int64(len(batch))) } else { diff --git a/services/graphite/service_test.go b/services/graphite/service_test.go index c5bcda7584b..9cc59fa4933 100644 --- a/services/graphite/service_test.go +++ b/services/graphite/service_test.go @@ -7,7 +7,6 @@ import ( "testing" "time" - "github.com/influxdata/influxdb/cluster" "github.com/influxdata/influxdb/models" "github.com/influxdata/influxdb/services/graphite" "github.com/influxdata/influxdb/services/meta" @@ -35,7 +34,7 @@ func Test_ServerGraphiteTCP(t *testing.T) { wg.Add(1) pointsWriter := PointsWriter{ - WritePointsFn: func(req *cluster.WritePointsRequest) error { + WritePointsFn: func(database, retentionPolicy string, consistencyLevel models.ConsistencyLevel, points []models.Point) error { defer wg.Done() pt, _ := models.NewPoint( @@ -44,14 +43,14 @@ func Test_ServerGraphiteTCP(t *testing.T) { map[string]interface{}{"value": 23.456}, time.Unix(now.Unix(), 0)) - if req.Database != "graphitedb" { - t.Fatalf("unexpected database: %s", req.Database) - } else if req.RetentionPolicy != "" { - t.Fatalf("unexpected retention policy: %s", req.RetentionPolicy) - } else if len(req.Points) != 1 { - t.Fatalf("expected 1 point, got %d", len(req.Points)) - } else if req.Points[0].String() != pt.String() { - t.Fatalf("expected point %v, got %v", pt.String(), req.Points[0].String()) + if database != "graphitedb" { + t.Fatalf("unexpected database: %s", database) + } else if retentionPolicy != "" { + t.Fatalf("unexpected retention policy: %s", retentionPolicy) + } else if len(points) != 1 { + t.Fatalf("expected 1 point, got %d", len(points)) + } else if points[0].String() != pt.String() { + t.Fatalf("expected point %v, got %v", pt.String(), points[0].String()) } return nil }, @@ -111,7 +110,7 @@ func Test_ServerGraphiteUDP(t *testing.T) { wg.Add(1) pointsWriter := PointsWriter{ - WritePointsFn: func(req *cluster.WritePointsRequest) error { + WritePointsFn: func(database, retentionPolicy string, consistencyLevel models.ConsistencyLevel, points []models.Point) error { defer wg.Done() pt, _ := models.NewPoint( @@ -119,12 +118,12 @@ func Test_ServerGraphiteUDP(t *testing.T) { map[string]string{}, map[string]interface{}{"value": 23.456}, time.Unix(now.Unix(), 0)) - if req.Database != "graphitedb" { - t.Fatalf("unexpected database: %s", req.Database) - } else if req.RetentionPolicy != "" { - t.Fatalf("unexpected retention policy: %s", req.RetentionPolicy) - } else if req.Points[0].String() != pt.String() { - t.Fatalf("unexpected points: %#v", req.Points[0].String()) + if database != "graphitedb" { + t.Fatalf("unexpected database: %s", database) + } else if retentionPolicy != "" { + t.Fatalf("unexpected retention policy: %s", retentionPolicy) + } else if points[0].String() != pt.String() { + t.Fatalf("unexpected points: %#v", points[0].String()) } return nil }, @@ -161,11 +160,11 @@ func Test_ServerGraphiteUDP(t *testing.T) { // PointsWriter represents a mock impl of PointsWriter. type PointsWriter struct { - WritePointsFn func(*cluster.WritePointsRequest) error + WritePointsFn func(database, retentionPolicy string, consistencyLevel models.ConsistencyLevel, points []models.Point) error } -func (w *PointsWriter) WritePoints(p *cluster.WritePointsRequest) error { - return w.WritePointsFn(p) +func (w *PointsWriter) WritePoints(database, retentionPolicy string, consistencyLevel models.ConsistencyLevel, points []models.Point) error { + return w.WritePointsFn(database, retentionPolicy, consistencyLevel, points) } type DatabaseCreator struct { diff --git a/services/httpd/handler.go b/services/httpd/handler.go index 829248b3220..513a31bf383 100644 --- a/services/httpd/handler.go +++ b/services/httpd/handler.go @@ -20,7 +20,6 @@ import ( "github.com/bmizerany/pat" "github.com/influxdata/influxdb" "github.com/influxdata/influxdb/client" - "github.com/influxdata/influxdb/cluster" "github.com/influxdata/influxdb/influxql" "github.com/influxdata/influxdb/models" "github.com/influxdata/influxdb/services/continuous_querier" @@ -70,7 +69,7 @@ type Handler struct { QueryExecutor influxql.QueryExecutor PointsWriter interface { - WritePoints(p *cluster.WritePointsRequest) error + WritePoints(database, retentionPolicy string, consistencyLevel models.ConsistencyLevel, points []models.Point) error } ContinuousQuerier continuous_querier.ContinuousQuerier @@ -454,11 +453,7 @@ func (h *Handler) serveWriteJSON(w http.ResponseWriter, r *http.Request, body [] } // Convert the json batch struct to a points writer struct - if err := h.PointsWriter.WritePoints(&cluster.WritePointsRequest{ - Database: bp.Database, - RetentionPolicy: bp.RetentionPolicy, - Points: points, - }); err != nil { + if err := h.PointsWriter.WritePoints(bp.Database, bp.RetentionPolicy, models.ConsistencyLevelAny, points); err != nil { h.statMap.Add(statPointsWrittenFail, int64(len(points))) if influxdb.IsClientError(err) { resultError(w, influxql.Result{Err: err}, http.StatusBadRequest) @@ -534,11 +529,7 @@ func (h *Handler) serveWriteLine(w http.ResponseWriter, r *http.Request, body [] } // Write points. - if err := h.PointsWriter.WritePoints(&cluster.WritePointsRequest{ - Database: database, - RetentionPolicy: r.FormValue("rp"), - Points: points, - }); influxdb.IsClientError(err) { + if err := h.PointsWriter.WritePoints(database, r.FormValue("rp"), models.ConsistencyLevelAny, points); influxdb.IsClientError(err) { h.statMap.Add(statPointsWrittenFail, int64(len(points))) resultError(w, influxql.Result{Err: err}, http.StatusBadRequest) return diff --git a/services/opentsdb/handler.go b/services/opentsdb/handler.go index 0c959d68346..c804e1a0b3b 100644 --- a/services/opentsdb/handler.go +++ b/services/opentsdb/handler.go @@ -13,7 +13,6 @@ import ( "time" "github.com/influxdata/influxdb" - "github.com/influxdata/influxdb/cluster" "github.com/influxdata/influxdb/models" ) @@ -23,7 +22,7 @@ type Handler struct { RetentionPolicy string PointsWriter interface { - WritePoints(p *cluster.WritePointsRequest) error + WritePoints(database, retentionPolicy string, consistencyLevel models.ConsistencyLevel, points []models.Point) error } Logger *log.Logger @@ -122,11 +121,7 @@ func (h *Handler) servePut(w http.ResponseWriter, r *http.Request) { } // Write points. - if err := h.PointsWriter.WritePoints(&cluster.WritePointsRequest{ - Database: h.Database, - RetentionPolicy: h.RetentionPolicy, - Points: points, - }); influxdb.IsClientError(err) { + if err := h.PointsWriter.WritePoints(h.Database, h.RetentionPolicy, models.ConsistencyLevelAny, points); influxdb.IsClientError(err) { h.Logger.Println("write series error: ", err) http.Error(w, "write series error: "+err.Error(), http.StatusBadRequest) return diff --git a/services/opentsdb/service.go b/services/opentsdb/service.go index ea239a068c7..1db82b47d98 100644 --- a/services/opentsdb/service.go +++ b/services/opentsdb/service.go @@ -17,7 +17,6 @@ import ( "time" "github.com/influxdata/influxdb" - "github.com/influxdata/influxdb/cluster" "github.com/influxdata/influxdb/models" "github.com/influxdata/influxdb/services/meta" "github.com/influxdata/influxdb/tsdb" @@ -62,7 +61,7 @@ type Service struct { RetentionPolicy string PointsWriter interface { - WritePoints(p *cluster.WritePointsRequest) error + WritePoints(database, retentionPolicy string, consistencyLevel models.ConsistencyLevel, points []models.Point) error } MetaClient interface { CreateDatabase(name string) (*meta.DatabaseInfo, error) @@ -365,11 +364,7 @@ func (s *Service) processBatches(batcher *tsdb.PointBatcher) { for { select { case batch := <-batcher.Out(): - if err := s.PointsWriter.WritePoints(&cluster.WritePointsRequest{ - Database: s.Database, - RetentionPolicy: s.RetentionPolicy, - Points: batch, - }); err == nil { + if err := s.PointsWriter.WritePoints(s.Database, s.RetentionPolicy, models.ConsistencyLevelAny, batch); err == nil { s.statMap.Add(statBatchesTrasmitted, 1) s.statMap.Add(statPointsTransmitted, int64(len(batch))) } else { diff --git a/services/opentsdb/service_test.go b/services/opentsdb/service_test.go index 3d9d412594b..5a4240a36ac 100644 --- a/services/opentsdb/service_test.go +++ b/services/opentsdb/service_test.go @@ -12,7 +12,6 @@ import ( "time" "github.com/davecgh/go-spew/spew" - "github.com/influxdata/influxdb/cluster" "github.com/influxdata/influxdb/models" "github.com/influxdata/influxdb/services/meta" "github.com/influxdata/influxdb/services/opentsdb" @@ -30,14 +29,14 @@ func TestService_Telnet(t *testing.T) { // Mock points writer. var called int32 - s.PointsWriter.WritePointsFn = func(req *cluster.WritePointsRequest) error { + s.PointsWriter.WritePointsFn = func(database, retentionPolicy string, consistencyLevel models.ConsistencyLevel, points []models.Point) error { atomic.StoreInt32(&called, 1) - if req.Database != "db0" { - t.Fatalf("unexpected database: %s", req.Database) - } else if req.RetentionPolicy != "" { - t.Fatalf("unexpected retention policy: %s", req.RetentionPolicy) - } else if !reflect.DeepEqual(req.Points, []models.Point{ + if database != "db0" { + t.Fatalf("unexpected database: %s", database) + } else if retentionPolicy != "" { + t.Fatalf("unexpected retention policy: %s", retentionPolicy) + } else if !reflect.DeepEqual(points, []models.Point{ models.MustNewPoint( "sys.cpu.user", map[string]string{"host": "webserver01", "cpu": "0"}, @@ -45,8 +44,7 @@ func TestService_Telnet(t *testing.T) { time.Unix(1356998400, 0), ), }) { - spew.Dump(req.Points) - t.Fatalf("unexpected points: %#v", req.Points) + t.Fatalf("unexpected points: %#v", points) } return nil } @@ -94,13 +92,13 @@ func TestService_HTTP(t *testing.T) { // Mock points writer. var called bool - s.PointsWriter.WritePointsFn = func(req *cluster.WritePointsRequest) error { + s.PointsWriter.WritePointsFn = func(database, retentionPolicy string, consistencyLevel models.ConsistencyLevel, points []models.Point) error { called = true - if req.Database != "db0" { - t.Fatalf("unexpected database: %s", req.Database) - } else if req.RetentionPolicy != "" { - t.Fatalf("unexpected retention policy: %s", req.RetentionPolicy) - } else if !reflect.DeepEqual(req.Points, []models.Point{ + if database != "db0" { + t.Fatalf("unexpected database: %s", database) + } else if retentionPolicy != "" { + t.Fatalf("unexpected retention policy: %s", retentionPolicy) + } else if !reflect.DeepEqual(points, []models.Point{ models.MustNewPoint( "sys.cpu.nice", map[string]string{"dc": "lga", "host": "web01"}, @@ -108,8 +106,8 @@ func TestService_HTTP(t *testing.T) { time.Unix(1346846400, 0), ), }) { - spew.Dump(req.Points) - t.Fatalf("unexpected points: %#v", req.Points) + spew.Dump(points) + t.Fatalf("unexpected points: %#v", points) } return nil } @@ -157,11 +155,11 @@ func NewService(database string) *Service { // PointsWriter represents a mock impl of PointsWriter. type PointsWriter struct { - WritePointsFn func(*cluster.WritePointsRequest) error + WritePointsFn func(database, retentionPolicy string, consistencyLevel models.ConsistencyLevel, points []models.Point) error } -func (w *PointsWriter) WritePoints(p *cluster.WritePointsRequest) error { - return w.WritePointsFn(p) +func (w *PointsWriter) WritePoints(database, retentionPolicy string, consistencyLevel models.ConsistencyLevel, points []models.Point) error { + return w.WritePointsFn(database, retentionPolicy, consistencyLevel, points) } type DatabaseCreator struct { diff --git a/services/udp/service.go b/services/udp/service.go index 25aff0bc20f..9ac24732f4e 100644 --- a/services/udp/service.go +++ b/services/udp/service.go @@ -11,7 +11,6 @@ import ( "time" "github.com/influxdata/influxdb" - "github.com/influxdata/influxdb/cluster" "github.com/influxdata/influxdb/models" "github.com/influxdata/influxdb/services/meta" "github.com/influxdata/influxdb/tsdb" @@ -49,7 +48,7 @@ type Service struct { config Config PointsWriter interface { - WritePoints(p *cluster.WritePointsRequest) error + WritePoints(database, retentionPolicy string, consistencyLevel models.ConsistencyLevel, points []models.Point) error } MetaClient interface { @@ -128,11 +127,7 @@ func (s *Service) writer() { for { select { case batch := <-s.batcher.Out(): - if err := s.PointsWriter.WritePoints(&cluster.WritePointsRequest{ - Database: s.config.Database, - RetentionPolicy: s.config.RetentionPolicy, - Points: batch, - }); err == nil { + if err := s.PointsWriter.WritePoints(s.config.Database, s.config.RetentionPolicy, models.ConsistencyLevelAny, batch); err == nil { s.statMap.Add(statBatchesTrasmitted, 1) s.statMap.Add(statPointsTransmitted, int64(len(batch))) } else { From ea37ed98e558780d5ba658623b689229d3c8dedb Mon Sep 17 00:00:00 2001 From: Ben Johnson Date: Fri, 11 Mar 2016 13:29:59 -0700 Subject: [PATCH 12/14] CHANGELOG --- CHANGELOG.md | 1 + cmd/influx_tsm/tsdb/codec.go | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f1cca890c9b..89e20e697f5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,7 @@ - [#5593](https://github.com/influxdata/influxdb/issues/5593): Modify `SHOW TAG VALUES` output for the new query engine to normalize the output. - [#5862](https://github.com/influxdata/influxdb/pull/5862): Make Admin UI dynamically fetch both client and server versions - [#2715](https://github.com/influxdata/influxdb/issues/2715): Support using field regex comparisons in the WHERE clause +- [#5994](https://github.com/influxdata/influxdb/issues/5994): Single server ### Bugfixes diff --git a/cmd/influx_tsm/tsdb/codec.go b/cmd/influx_tsm/tsdb/codec.go index 060927d8f25..d7751d0aeb3 100644 --- a/cmd/influx_tsm/tsdb/codec.go +++ b/cmd/influx_tsm/tsdb/codec.go @@ -103,8 +103,6 @@ func (f *FieldCodec) DecodeByID(targetID uint8, b []byte) (interface{}, error) { return value, nil } } - - return nil, ErrFieldNotFound } // DecodeByName scans a byte slice for a field with the given name, converts it to its From 1d2c1faa9435e36fab51864795262c7d4c8edd46 Mon Sep 17 00:00:00 2001 From: Cory LaNou Date: Mon, 14 Mar 2016 10:51:12 -0500 Subject: [PATCH 13/14] address PR feedback --- cmd/influxd/run/server.go | 14 +--- cmd/influxd/run/server_test.go | 33 +++++++++ services/httpd/handler.go | 15 ++++ services/httpd/handler_test.go | 14 ++++ services/meta/client.go | 130 +++++++++++++++++++++++++-------- 5 files changed, 163 insertions(+), 43 deletions(-) diff --git a/cmd/influxd/run/server.go b/cmd/influxd/run/server.go index ee0a85b39e7..dc4c378a6f1 100644 --- a/cmd/influxd/run/server.go +++ b/cmd/influxd/run/server.go @@ -2,14 +2,12 @@ package run import ( "fmt" - "io/ioutil" "log" "net" "os" "path/filepath" "runtime" "runtime/pprof" - "strings" "time" "github.com/influxdata/influxdb" @@ -118,21 +116,11 @@ func NewServer(c *Config, buildInfo *BuildInfo) (*Server, error) { } } - node, err := influxdb.LoadNode(c.Meta.Dir) + _, err := influxdb.LoadNode(c.Meta.Dir) if err != nil { if !os.IsNotExist(err) { return nil, err } - - node = influxdb.NewNode(c.Meta.Dir) - } - - // In 0.11 we removed MetaServers from node.json. To avoid confusion for - // existing users, force a re-save of the node.json file to remove that property - // if it happens to exist. - nodeContents, err := ioutil.ReadFile(filepath.Join(c.Meta.Dir, "node.json")) - if err == nil && strings.Contains(string(nodeContents), "MetaServers") { - node.Save() } // In 0.10.0 bind-address got moved to the top level. Check diff --git a/cmd/influxd/run/server_test.go b/cmd/influxd/run/server_test.go index 8d378b56b7a..c4392831b98 100644 --- a/cmd/influxd/run/server_test.go +++ b/cmd/influxd/run/server_test.go @@ -495,6 +495,39 @@ func TestServer_Write_LineProtocol_Integer(t *testing.T) { } } +// Ensure the server returns a partial write response when some points fail to parse. Also validate that +// the successfully parsed points can be queried. +func TestServer_Write_LineProtocol_Partial(t *testing.T) { + t.Parallel() + s := OpenServer(NewConfig()) + defer s.Close() + + if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 1*time.Hour)); err != nil { + t.Fatal(err) + } + + now := now() + points := []string{ + "cpu,host=server01 value=100 " + strconv.FormatInt(now.UnixNano(), 10), + "cpu,host=server01 value=NaN " + strconv.FormatInt(now.UnixNano(), 20), + "cpu,host=server01 value=NaN " + strconv.FormatInt(now.UnixNano(), 30), + } + if res, err := s.Write("db0", "rp0", strings.Join(points, "\n"), nil); err == nil { + t.Fatal("expected error. got nil", err) + } else if exp := ``; exp != res { + t.Fatalf("unexpected results\nexp: %s\ngot: %s\n", exp, res) + } else if exp := "partial write"; !strings.Contains(err.Error(), exp) { + t.Fatalf("unexpected error: exp\nexp: %v\ngot: %v", exp, err) + } + + // Verify the data was written. + if res, err := s.Query(`SELECT * FROM db0.rp0.cpu GROUP BY *`); err != nil { + t.Fatal(err) + } else if exp := fmt.Sprintf(`{"results":[{"series":[{"name":"cpu","tags":{"host":"server01"},"columns":["time","value"],"values":[["%s",100]]}]}]}`, now.Format(time.RFC3339Nano)); exp != res { + t.Fatalf("unexpected results\nexp: %s\ngot: %s\n", exp, res) + } +} + // Ensure the server can query with default databases (via param) and default retention policy func TestServer_Query_DefaultDBAndRP(t *testing.T) { t.Parallel() diff --git a/services/httpd/handler.go b/services/httpd/handler.go index 513a31bf383..98dbfe91a1c 100644 --- a/services/httpd/handler.go +++ b/services/httpd/handler.go @@ -118,6 +118,14 @@ func NewHandler(requireAuthentication, loggingEnabled, writeTrace, JSONWriteEnab "ping-head", "HEAD", "/ping", true, true, h.servePing, }, + route{ // Ping w/ status + "status", + "GET", "/status", true, true, h.serveStatus, + }, + route{ // Ping w/ status + "status-head", + "HEAD", "/status", true, true, h.serveStatus, + }, route{ // Tell data node to run CQs that should be run "process_continuous_queries", "POST", "/data/process_continuous_queries", false, false, h.serveProcessContinuousQueries, @@ -561,6 +569,13 @@ func (h *Handler) servePing(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusNoContent) } +// serveStatus has been depricated +func (h *Handler) serveStatus(w http.ResponseWriter, r *http.Request) { + h.Logger.Printf("WARNING: /status has been depricated. Use /ping instead.") + h.statMap.Add(statStatusRequest, 1) + w.WriteHeader(http.StatusNoContent) +} + // convertToEpoch converts result timestamps from time.Time to the specified epoch. func convertToEpoch(r *influxql.Result, epoch string) { divisor := int64(1) diff --git a/services/httpd/handler_test.go b/services/httpd/handler_test.go index a1dafc56ff7..c1ca1fa130d 100644 --- a/services/httpd/handler_test.go +++ b/services/httpd/handler_test.go @@ -340,6 +340,20 @@ func TestHandler_Version(t *testing.T) { } } +// Ensure the handler handles status requests correctly. +func TestHandler_Status(t *testing.T) { + h := NewHandler(false) + w := httptest.NewRecorder() + h.ServeHTTP(w, MustNewRequest("GET", "/status", nil)) + if w.Code != http.StatusNoContent { + t.Fatalf("unexpected status: %d", w.Code) + } + h.ServeHTTP(w, MustNewRequest("HEAD", "/status", nil)) + if w.Code != http.StatusNoContent { + t.Fatalf("unexpected status: %d", w.Code) + } +} + // Ensure write endpoint can handle bad requests func TestHandler_HandleBadRequestBody(t *testing.T) { b := bytes.NewReader(make([]byte, 10)) diff --git a/services/meta/client.go b/services/meta/client.go index 80937bc1ad7..6dcfe88c4fb 100644 --- a/services/meta/client.go +++ b/services/meta/client.go @@ -97,7 +97,7 @@ func (c *Client) Open() error { // If this is a brand new instance, persist to disk immediatly. if c.cacheData.Index == 1 { - if err := c.Snapshot(); err != nil { + if err := snapshot(c.path, c.cacheData); err != nil { return err } } @@ -207,7 +207,10 @@ func (c *Client) CreateDatabase(name string) (*DatabaseInfo, error) { db := data.Database(name) - c.commit(data) + if err := c.commit(data); err != nil { + return nil, err + } + return db, nil } @@ -247,7 +250,10 @@ func (c *Client) CreateDatabaseWithRetentionPolicy(name string, rpi *RetentionPo db := data.Database(name) - c.commit(data) + if err := c.commit(data); err != nil { + return nil, err + } + return db, nil } @@ -262,7 +268,10 @@ func (c *Client) DropDatabase(name string) error { return err } - c.commit(data) + if err := c.commit(data); err != nil { + return err + } + return nil } @@ -290,7 +299,10 @@ func (c *Client) CreateRetentionPolicy(database string, rpi *RetentionPolicyInfo return nil, err } - c.commit(data) + if err := c.commit(data); err != nil { + return nil, err + } + return rp, nil } @@ -319,7 +331,10 @@ func (c *Client) DropRetentionPolicy(database, name string) error { return err } - c.commit(data) + if err := c.commit(data); err != nil { + return err + } + return nil } @@ -334,7 +349,10 @@ func (c *Client) SetDefaultRetentionPolicy(database, name string) error { return err } - c.commit(data) + if err := c.commit(data); err != nil { + return err + } + return nil } @@ -349,7 +367,10 @@ func (c *Client) UpdateRetentionPolicy(database, name string, rpu *RetentionPoli return err } - defer c.commit(data) + if err := c.commit(data); err != nil { + return err + } + return nil } @@ -420,7 +441,10 @@ func (c *Client) CreateUser(name, password string, admin bool) (*UserInfo, error u := data.User(name) - c.commit(data) + if err := c.commit(data); err != nil { + return nil, err + } + return u, nil } @@ -442,7 +466,10 @@ func (c *Client) UpdateUser(name, password string) error { delete(c.authCache, name) - c.commit(data) + if err := c.commit(data); err != nil { + return err + } + return nil } @@ -456,7 +483,10 @@ func (c *Client) DropUser(name string) error { return err } - c.commit(data) + if err := c.commit(data); err != nil { + return err + } + return nil } @@ -470,7 +500,10 @@ func (c *Client) SetPrivilege(username, database string, p influxql.Privilege) e return err } - c.commit(data) + if err := c.commit(data); err != nil { + return err + } + return nil } @@ -484,7 +517,10 @@ func (c *Client) SetAdminPrivilege(username string, admin bool) error { return err } - c.commit(data) + if err := c.commit(data); err != nil { + return err + } + return nil } @@ -651,7 +687,10 @@ func (c *Client) CreateShardGroup(database, policy string, timestamp time.Time) return nil, err } - c.commit(data) + if err := c.commit(data); err != nil { + return nil, err + } + return sgi, nil } @@ -686,7 +725,10 @@ func (c *Client) DeleteShardGroup(database, policy string, id uint64) error { return err } - c.commit(data) + if err := c.commit(data); err != nil { + return err + } + return nil } @@ -722,7 +764,10 @@ func (c *Client) PrecreateShardGroups(from, to time.Time) error { } } - c.commit(data) + if err := c.commit(data); err != nil { + return err + } + return nil } @@ -763,7 +808,10 @@ func (c *Client) CreateContinuousQuery(database, name, query string) error { return err } - c.commit(data) + if err := c.commit(data); err != nil { + return err + } + return nil } @@ -777,7 +825,10 @@ func (c *Client) DropContinuousQuery(database, name string) error { return nil } - defer c.commit(data) + if err := c.commit(data); err != nil { + return err + } + return nil } @@ -791,7 +842,10 @@ func (c *Client) CreateSubscription(database, rp, name, mode string, destination return err } - c.commit(data) + if err := c.commit(data); err != nil { + return err + } + return nil } @@ -805,7 +859,10 @@ func (c *Client) DropSubscription(database, rp, name string) error { return err } - c.commit(data) + if err := c.commit(data); err != nil { + return err + } + return nil } @@ -818,7 +875,10 @@ func (c *Client) SetData(data *Data) error { // increment the index to force the changed channel to fire d := data.Clone() d.Index++ - c.commit(d) + + if err := c.commit(d); err != nil { + return err + } c.mu.Unlock() @@ -834,12 +894,22 @@ func (c *Client) WaitForDataChanged() chan struct{} { } // commit assumes it is under a full lock -func (c *Client) commit(data *Data) { +func (c *Client) commit(data *Data) error { data.Index++ + + // try to write to disk before updating in memory + if err := snapshot(c.path, data); err != nil { + return err + } + + // update in memory c.cacheData = data - c.Snapshot() + + // close channels to signal changes close(c.changed) c.changed = make(chan struct{}) + + return nil } func (c *Client) MarshalBinary() ([]byte, error) { @@ -869,9 +939,9 @@ func (c *Client) updateAuthCache() { c.authCache = newCache } -// Snapshot will save the current meta data to disk -func (c *Client) Snapshot() error { - file := filepath.Join(c.path, metaFile) +// snapshot will save the current meta data to disk +func snapshot(path string, data *Data) error { + file := filepath.Join(path, metaFile) tmpFile := file + "tmp" f, err := os.Create(tmpFile) @@ -880,14 +950,14 @@ func (c *Client) Snapshot() error { } defer f.Close() - var data []byte - if b, err := c.cacheData.MarshalBinary(); err != nil { + var d []byte + if b, err := data.MarshalBinary(); err != nil { return err } else { - data = b + d = b } - if _, err := f.Write(data); err != nil { + if _, err := f.Write(d); err != nil { return err } From 207bafae8a8bf359eddadff0a133974a6ffa0896 Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Mon, 14 Mar 2016 16:59:06 +0000 Subject: [PATCH 14/14] Create a Point from a models.Point --- client/v2/client.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/client/v2/client.go b/client/v2/client.go index c5fbc2d97dc..ebc910062f8 100644 --- a/client/v2/client.go +++ b/client/v2/client.go @@ -392,6 +392,11 @@ func (p *Point) Fields() map[string]interface{} { return p.pt.Fields() } +// NewPointFrom returns a point from the provided models.Point. +func NewPointFrom(pt models.Point) *Point { + return &Point{pt: pt} +} + func (uc *udpclient) Write(bp BatchPoints) error { var b bytes.Buffer var d time.Duration