diff --git a/etc/config.sample.toml b/etc/config.sample.toml
index 0846a0b2919..fdf5d50d84d 100644
--- a/etc/config.sample.toml
+++ b/etc/config.sample.toml
@@ -37,7 +37,9 @@ reporting-disabled = false
 [data]
   dir = "/var/opt/influxdb/data"
 
-  # Controls the engine type for new shards.
+  # Controls the engine type for new shards. Options are b1, bz1, or tsm1.
+  # b1 is the 0.9.2 storage engine, bz1 is the 0.9.3 and 0.9.4 engine.
+  # tsm1 is the 0.9.5 engine
   # engine ="bz1"
 
   # The following WAL settings are for the b1 storage engine used in 0.9.2. They won't
diff --git a/models/points.go b/models/points.go
index 1ded7234d5b..592780389d4 100644
--- a/models/points.go
+++ b/models/points.go
@@ -1021,6 +1021,10 @@ func (p *point) Tags() Tags {
 			i, key = scanTo(p.key, i, '=')
 			i, value = scanTagValue(p.key, i+1)
 
+			if len(value) == 0 {
+				continue
+			}
+
 			tags[string(unescapeTag(key))] = string(unescapeTag(value))
 
 			i += 1
@@ -1141,7 +1145,10 @@ func (t Tags) HashKey() []byte {
 	for k, v := range t {
 		ek := escapeTag([]byte(k))
 		ev := escapeTag([]byte(v))
-		escaped[string(ek)] = string(ev)
+
+		if len(ev) > 0 {
+			escaped[string(ek)] = string(ev)
+		}
 	}
 
 	// Extract keys and determine final size.
diff --git a/models/points_test.go b/models/points_test.go
index 1d4d8bf866a..4186b89d970 100644
--- a/models/points_test.go
+++ b/models/points_test.go
@@ -605,6 +605,18 @@ func TestParsePointUnescape(t *testing.T) {
 			},
 			time.Unix(0, 0)))
 
+	// tag with no value
+	test(t, `cpu,regions=east value="1"`,
+		models.NewPoint("cpu",
+			models.Tags{
+				"regions": "east",
+				"foobar":  "",
+			},
+			models.Fields{
+				"value": "1",
+			},
+			time.Unix(0, 0)))
+
 	// commas in field values
 	test(t, `cpu,regions=east value="1,0"`,
 		models.NewPoint("cpu",
diff --git a/services/copier/service_test.go b/services/copier/service_test.go
index a5266087d7f..ce1151d3cf8 100644
--- a/services/copier/service_test.go
+++ b/services/copier/service_test.go
@@ -19,6 +19,7 @@ import (
 
 // Ensure the service can return shard data.
 func TestService_handleConn(t *testing.T) {
+	t.Skip("not implemented for tsm1 engine")
 	s := MustOpenService()
 	defer s.Close()
 
diff --git a/tsdb/config.go b/tsdb/config.go
index 9843541e296..52d182c1172 100644
--- a/tsdb/config.go
+++ b/tsdb/config.go
@@ -42,7 +42,15 @@ const (
 	// we'll need to create backpressure, otherwise we'll fill up the memory and die.
 	// This number multiplied by the parition count is roughly the max possible memory
 	// size for the in-memory WAL cache.
-	DefaultPartitionSizeThreshold = 20 * 1024 * 1024 // 20MB
+	DefaultPartitionSizeThreshold = 50 * 1024 * 1024 // 50MB
+
+	// Default WAL settings for the TSM1 WAL
+	DefaultFlushMemorySizeThreshold    = 5 * 1024 * 1024   // 5MB
+	DefaultMaxMemorySizeThreshold      = 100 * 1024 * 1024 // 100MB
+	DefaultIndexCompactionAge          = time.Minute
+	DefaultIndexMinCompactionInterval  = time.Minute
+	DefaultIndexMinCompactionFileCount = 5
+	DefaultIndexCompactionFullAge      = 5 * time.Minute
 )
 
 type Config struct {
@@ -63,6 +71,28 @@ type Config struct {
 	WALFlushColdInterval      toml.Duration `toml:"wal-flush-cold-interval"`
 	WALPartitionSizeThreshold uint64        `toml:"wal-partition-size-threshold"`
 
+	// WAL configuration options for tsm1 introduced in 0.9.5
+	WALFlushMemorySizeThreshold int `toml:"wal-flush-memory-size-threshold"`
+	WALMaxMemorySizeThreshold   int `toml:"wal-max-memory-size-threshold"`
+
+	// compaction options for tsm1 introduced in 0.9.5
+
+	// IndexCompactionAge specifies the duration after the data file creation time
+	// at which it is eligible to be compacted
+	IndexCompactionAge time.Duration `toml:"index-compaction-age"`
+
+	// IndexMinimumCompactionInterval specifies the minimum amount of time that must
+	// pass after a compaction before another compaction is run
+	IndexMinCompactionInterval time.Duration `toml:"index-min-compaction-interval"`
+
+	// IndexCompactionFileCount specifies the minimum number of data files that
+	// must be eligible for compaction before actually running one
+	IndexMinCompactionFileCount int `toml:"index-compaction-min-file-count"`
+
+	// IndexCompactionFullAge specifies how long after the last write was received
+	// in the WAL that a full compaction should be performed.
+	IndexCompactionFullAge time.Duration `toml:"index-compaction-full-age"`
+
 	// Query logging
 	QueryLogEnabled bool `toml:"query-log-enabled"`
 }
@@ -74,12 +104,18 @@ func NewConfig() Config {
 		WALFlushInterval:       toml.Duration(DefaultWALFlushInterval),
 		WALPartitionFlushDelay: toml.Duration(DefaultWALPartitionFlushDelay),
 
-		WALLoggingEnabled:         true,
-		WALReadySeriesSize:        DefaultReadySeriesSize,
-		WALCompactionThreshold:    DefaultCompactionThreshold,
-		WALMaxSeriesSize:          DefaultMaxSeriesSize,
-		WALFlushColdInterval:      toml.Duration(DefaultFlushColdInterval),
-		WALPartitionSizeThreshold: DefaultPartitionSizeThreshold,
+		WALLoggingEnabled:           true,
+		WALReadySeriesSize:          DefaultReadySeriesSize,
+		WALCompactionThreshold:      DefaultCompactionThreshold,
+		WALMaxSeriesSize:            DefaultMaxSeriesSize,
+		WALFlushColdInterval:        toml.Duration(DefaultFlushColdInterval),
+		WALPartitionSizeThreshold:   DefaultPartitionSizeThreshold,
+		WALFlushMemorySizeThreshold: DefaultFlushMemorySizeThreshold,
+		WALMaxMemorySizeThreshold:   DefaultMaxMemorySizeThreshold,
+		IndexCompactionAge:          DefaultIndexCompactionAge,
+		IndexMinCompactionFileCount: DefaultIndexMinCompactionFileCount,
+		IndexCompactionFullAge:      DefaultIndexCompactionFullAge,
+		IndexMinCompactionInterval:  DefaultIndexMinCompactionInterval,
 
 		QueryLogEnabled: true,
 	}
diff --git a/tsdb/engine.go b/tsdb/engine.go
index c8d5946139f..fb1b2108c5c 100644
--- a/tsdb/engine.go
+++ b/tsdb/engine.go
@@ -24,7 +24,7 @@ type Engine interface {
 	Close() error
 
 	SetLogOutput(io.Writer)
-	LoadMetadataIndex(index *DatabaseIndex, measurementFields map[string]*MeasurementFields) error
+	LoadMetadataIndex(shard *Shard, index *DatabaseIndex, measurementFields map[string]*MeasurementFields) error
 
 	Begin(writable bool) (Tx, error)
 	WritePoints(points []models.Point, measurementFieldsToSave map[string]*MeasurementFields, seriesToCreate []*SeriesCreate) error
@@ -32,9 +32,23 @@ type Engine interface {
 	DeleteMeasurement(name string, seriesKeys []string) error
 	SeriesCount() (n int, err error)
 
+	// PerformMaintenance will get called periodically by the store
+	PerformMaintenance()
+
+	// Format will return the format for the engine
+	Format() EngineFormat
+
 	io.WriterTo
 }
 
+type EngineFormat int
+
+const (
+	B1Format EngineFormat = iota
+	BZ1Format
+	TSM1Format
+)
+
 // NewEngineFunc creates a new engine.
 type NewEngineFunc func(path string, walPath string, options EngineOptions) Engine
 
@@ -57,9 +71,24 @@ func NewEngine(path string, walPath string, options EngineOptions) (Engine, erro
 		return newEngineFuncs[options.EngineVersion](path, walPath, options), nil
 	}
 
-	// Only bolt-based backends are currently supported so open it and check the format.
+	// Only bolt and tsm1 based storage engines are currently supported
 	var format string
 	if err := func() error {
+		// if it's a dir then it's a tsm1 engine
+		f, err := os.Open(path)
+		if err != nil {
+			return err
+		}
+		fi, err := f.Stat()
+		f.Close()
+		if err != nil {
+			return err
+		}
+		if fi.Mode().IsDir() {
+			format = "tsm1"
+			return nil
+		}
+
 		db, err := bolt.Open(path, 0666, &bolt.Options{Timeout: 1 * time.Second})
 		if err != nil {
 			return err
diff --git a/tsdb/engine/b1/b1.go b/tsdb/engine/b1/b1.go
index fdc337b2f7a..a3f63602cdc 100644
--- a/tsdb/engine/b1/b1.go
+++ b/tsdb/engine/b1/b1.go
@@ -91,6 +91,14 @@ func NewEngine(path string, walPath string, opt tsdb.EngineOptions) tsdb.Engine
 // Path returns the path the engine was initialized with.
 func (e *Engine) Path() string { return e.path }
 
+// PerformMaintenance is for periodic maintenance of the store. A no-op for b1
+func (e *Engine) PerformMaintenance() {}
+
+// Format returns the format type of this engine
+func (e *Engine) Format() tsdb.EngineFormat {
+	return tsdb.B1Format
+}
+
 // Open opens and initializes the engine.
 func (e *Engine) Open() error {
 	if err := func() error {
@@ -174,7 +182,7 @@ func (e *Engine) close() error {
 func (e *Engine) SetLogOutput(w io.Writer) { e.LogOutput = w }
 
 // LoadMetadataIndex loads the shard metadata into memory.
-func (e *Engine) LoadMetadataIndex(index *tsdb.DatabaseIndex, measurementFields map[string]*tsdb.MeasurementFields) error {
+func (e *Engine) LoadMetadataIndex(shard *tsdb.Shard, index *tsdb.DatabaseIndex, measurementFields map[string]*tsdb.MeasurementFields) error {
 	return e.db.View(func(tx *bolt.Tx) error {
 		// load measurement metadata
 		meta := tx.Bucket([]byte("fields"))
diff --git a/tsdb/engine/b1/b1_test.go b/tsdb/engine/b1/b1_test.go
index 5c3c19ee3bc..31b90344c36 100644
--- a/tsdb/engine/b1/b1_test.go
+++ b/tsdb/engine/b1/b1_test.go
@@ -21,7 +21,7 @@ func TestEngine_WritePoints(t *testing.T) {
 
 	// Create metadata.
 	mf := &tsdb.MeasurementFields{Fields: make(map[string]*tsdb.Field)}
-	mf.CreateFieldIfNotExists("value", influxql.Float)
+	mf.CreateFieldIfNotExists("value", influxql.Float, true)
 	seriesToCreate := []*tsdb.SeriesCreate{
 		{Series: tsdb.NewSeries(string(models.MakeKey([]byte("temperature"), nil)), nil)},
 	}
@@ -84,7 +84,7 @@ func TestEngine_WritePoints_Reverse(t *testing.T) {
 
 	// Create metadata.
 	mf := &tsdb.MeasurementFields{Fields: make(map[string]*tsdb.Field)}
-	mf.CreateFieldIfNotExists("value", influxql.Float)
+	mf.CreateFieldIfNotExists("value", influxql.Float, true)
 	seriesToCreate := []*tsdb.SeriesCreate{
 		{Series: tsdb.NewSeries(string(models.MakeKey([]byte("temperature"), nil)), nil)},
 	}
diff --git a/tsdb/engine/bz1/bz1.go b/tsdb/engine/bz1/bz1.go
index e4d5682ca47..881b82dc431 100644
--- a/tsdb/engine/bz1/bz1.go
+++ b/tsdb/engine/bz1/bz1.go
@@ -114,6 +114,14 @@ func NewEngine(path string, walPath string, opt tsdb.EngineOptions) tsdb.Engine
 // Path returns the path the engine was opened with.
 func (e *Engine) Path() string { return e.path }
 
+// PerformMaintenance is for periodic maintenance of the store. A no-op for bz1
+func (e *Engine) PerformMaintenance() {}
+
+// Format returns the format type of this engine
+func (e *Engine) Format() tsdb.EngineFormat {
+	return tsdb.BZ1Format
+}
+
 // Open opens and initializes the engine.
 func (e *Engine) Open() error {
 	if err := func() error {
@@ -176,7 +184,7 @@ func (e *Engine) close() error {
 func (e *Engine) SetLogOutput(w io.Writer) {}
 
 // LoadMetadataIndex loads the shard metadata into memory.
-func (e *Engine) LoadMetadataIndex(index *tsdb.DatabaseIndex, measurementFields map[string]*tsdb.MeasurementFields) error {
+func (e *Engine) LoadMetadataIndex(shard *tsdb.Shard, index *tsdb.DatabaseIndex, measurementFields map[string]*tsdb.MeasurementFields) error {
 	if err := e.db.View(func(tx *bolt.Tx) error {
 		// Load measurement metadata
 		fields, err := e.readFields(tx)
diff --git a/tsdb/engine/bz1/bz1_test.go b/tsdb/engine/bz1/bz1_test.go
index 97873afe3b2..0b0cb1e60cc 100644
--- a/tsdb/engine/bz1/bz1_test.go
+++ b/tsdb/engine/bz1/bz1_test.go
@@ -38,7 +38,7 @@ func TestEngine_LoadMetadataIndex_Series(t *testing.T) {
 
 	// Load metadata index.
 	index := tsdb.NewDatabaseIndex()
-	if err := e.LoadMetadataIndex(index, make(map[string]*tsdb.MeasurementFields)); err != nil {
+	if err := e.LoadMetadataIndex(nil, index, make(map[string]*tsdb.MeasurementFields)); err != nil {
 		t.Fatal(err)
 	}
 
@@ -80,7 +80,7 @@ func TestEngine_LoadMetadataIndex_Fields(t *testing.T) {
 
 	// Load metadata index.
 	mfs := make(map[string]*tsdb.MeasurementFields)
-	if err := e.LoadMetadataIndex(tsdb.NewDatabaseIndex(), mfs); err != nil {
+	if err := e.LoadMetadataIndex(nil, tsdb.NewDatabaseIndex(), mfs); err != nil {
 		t.Fatal(err)
 	}
 
diff --git a/tsdb/engine/engine.go b/tsdb/engine/engine.go
index c5565ff06cf..6c8cb51e193 100644
--- a/tsdb/engine/engine.go
+++ b/tsdb/engine/engine.go
@@ -3,4 +3,5 @@ package engine
 import (
 	_ "github.com/influxdb/influxdb/tsdb/engine/b1"
 	_ "github.com/influxdb/influxdb/tsdb/engine/bz1"
+	_ "github.com/influxdb/influxdb/tsdb/engine/tsm1"
 )
diff --git a/tsdb/engine/tsm1/bool.go b/tsdb/engine/tsm1/bool.go
new file mode 100644
index 00000000000..83f570a2ef2
--- /dev/null
+++ b/tsdb/engine/tsm1/bool.go
@@ -0,0 +1,135 @@
+package tsm1
+
+// bool encoding uses 1 bit per value.  Each compressed byte slice contains a 1 byte header
+// indicating the compression type, followed by a variable byte encoded length indicating
+// how many booleans are packed in the slice.  The remaining bytes contains 1 byte for every
+// 8 boolean values encoded.
+
+import "encoding/binary"
+
+const (
+	// boolUncompressed is an uncompressed boolean format
+	boolUncompressed = 0
+	// boolCompressedBitPacked is an bit packed format using 1 bit per boolean
+	boolCompressedBitPacked = 1
+)
+
+type BoolEncoder interface {
+	Write(b bool)
+	Bytes() ([]byte, error)
+}
+
+type BoolDecoder interface {
+	Next() bool
+	Read() bool
+	Error() error
+}
+
+type boolEncoder struct {
+	// The encoded bytes
+	bytes []byte
+
+	// The current byte being encoded
+	b byte
+
+	// The number of bools packed into b
+	i int
+
+	// The total number of bools written
+	n int
+}
+
+func NewBoolEncoder() BoolEncoder {
+	return &boolEncoder{}
+}
+
+func (e *boolEncoder) Write(b bool) {
+	// If we have filled the current byte, flush it
+	if e.i >= 8 {
+		e.flush()
+	}
+
+	// Use 1 bit for each boolen value, shift the current byte
+	// by 1 and set the least signficant bit acordingly
+	e.b = e.b << 1
+	if b {
+		e.b |= 1
+	}
+
+	// Increment the current bool count
+	e.i += 1
+	// Increment the total bool count
+	e.n += 1
+}
+
+func (e *boolEncoder) flush() {
+	// Pad remaining byte w/ 0s
+	for e.i < 8 {
+		e.b = e.b << 1
+		e.i += 1
+	}
+
+	// If we have bits set, append them to the byte slice
+	if e.i > 0 {
+		e.bytes = append(e.bytes, e.b)
+		e.b = 0
+		e.i = 0
+	}
+}
+
+func (e *boolEncoder) Bytes() ([]byte, error) {
+	// Ensure the current byte is flushed
+	e.flush()
+	b := make([]byte, 10+1)
+
+	// Store the encoding type in the 4 high bits of the first byte
+	b[0] = byte(boolCompressedBitPacked) << 4
+
+	i := 1
+	// Encode the number of bools written
+	i += binary.PutUvarint(b[i:], uint64(e.n))
+
+	// Append the packed booleans
+	return append(b[:i], e.bytes...), nil
+}
+
+type boolDecoder struct {
+	b   []byte
+	i   int
+	n   int
+	err error
+}
+
+func NewBoolDecoder(b []byte) BoolDecoder {
+	// First byte stores the encoding type, only have 1 bit-packet format
+	// currently ignore for now.
+	b = b[1:]
+	count, n := binary.Uvarint(b)
+	return &boolDecoder{b: b[n:], i: -1, n: int(count)}
+}
+
+func (e *boolDecoder) Next() bool {
+	e.i += 1
+	return e.i < e.n
+}
+
+func (e *boolDecoder) Read() bool {
+	// Index into the byte slice
+	idx := e.i / 8
+
+	// Bit position
+	pos := (8 - e.i%8) - 1
+
+	// The mask to select the bit
+	mask := byte(1 << uint(pos))
+
+	// The packed byte
+	v := e.b[idx]
+
+	// Returns true if the bit is set
+	return v&mask == mask
+}
+
+func (e *boolDecoder) Error() error {
+	return e.err
+}
diff --git a/tsdb/engine/tsm1/bool_test.go b/tsdb/engine/tsm1/bool_test.go
new file mode 100644
index 00000000000..ed68987afd1
--- /dev/null
+++ b/tsdb/engine/tsm1/bool_test.go
@@ -0,0 +1,73 @@
+package tsm1_test
+
+import (
+	"testing"
+
+	"github.com/influxdb/influxdb/tsdb/engine/tsm1"
+)
+
+func Test_BoolEncoder_NoValues(t *testing.T) {
+	enc := tsm1.NewBoolEncoder()
+	b, err := enc.Bytes()
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	dec := tsm1.NewBoolDecoder(b)
+	if dec.Next() {
+		t.Fatalf("unexpected next value: got true, exp false")
+	}
+}
+
+func Test_BoolEncoder_Single(t *testing.T) {
+	enc := tsm1.NewBoolEncoder()
+	v1 := true
+	enc.Write(v1)
+	b, err := enc.Bytes()
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	dec := tsm1.NewBoolDecoder(b)
+	if !dec.Next() {
+		t.Fatalf("unexpected next value: got false, exp true")
+	}
+
+	if v1 != dec.Read() {
+		t.Fatalf("unexpected value: got %v, exp %v", dec.Read(), v1)
+	}
+}
+
+func Test_BoolEncoder_Multi_Compressed(t *testing.T) {
+	enc := tsm1.NewBoolEncoder()
+
+	values := make([]bool, 10)
+	for i := range values {
+		values[i] = i%2 == 0
+		enc.Write(values[i])
+	}
+
+	b, err := enc.Bytes()
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if exp := 4; len(b) != exp {
+		t.Fatalf("unexpected length: got %v, exp %v", len(b), exp)
+	}
+
+	dec := tsm1.NewBoolDecoder(b)
+
+	for i, v := range values {
+		if !dec.Next() {
+			t.Fatalf("unexpected next value: got false, exp true")
+		}
+		if v != dec.Read() {
+			t.Fatalf("unexpected value at pos %d: got %v, exp %v", i, dec.Read(), v)
+		}
+	}
+
+	if dec.Next() {
+		t.Fatalf("unexpected next value: got true, exp false")
+	}
+}
diff --git a/tsdb/engine/tsm1/cursor.go b/tsdb/engine/tsm1/cursor.go
new file mode 100644
index 00000000000..0cee157d3a3
--- /dev/null
+++ b/tsdb/engine/tsm1/cursor.go
@@ -0,0 +1,481 @@
+package tsm1
+
+import (
+	"math"
+
+	"github.com/influxdb/influxdb/tsdb"
+)
+
+// combinedEngineCursor holds a cursor for the WAL and the index
+// and will combine the two together. Any points in the WAL with
+// identical timestamps from the index will be preferred over the
+// index point
+type combinedEngineCursor struct {
+	walCursor      tsdb.Cursor
+	engineCursor   tsdb.Cursor
+	walKeyBuf      int64
+	walValueBuf    interface{}
+	engineKeyBuf   int64
+	engineValueBuf interface{}
+	ascending      bool
+}
+
+func NewCombinedEngineCursor(wc, ec tsdb.Cursor, ascending bool) tsdb.Cursor {
+	return &combinedEngineCursor{
+		walCursor:    wc,
+		engineCursor: ec,
+		ascending:    ascending,
+	}
+}
+
+// SeekTo will seek both the index and WAL cursor
+func (c *combinedEngineCursor) SeekTo(seek int64) (key int64, value interface{}) {
+	c.walKeyBuf, c.walValueBuf = c.walCursor.SeekTo(seek)
+	c.engineKeyBuf, c.engineValueBuf = c.engineCursor.SeekTo(seek)
+	return c.read()
+}
+
+// Next returns the next value in the cursor
+func (c *combinedEngineCursor) Next() (int64, interface{}) {
+	return c.read()
+}
+
+// Ascending returns true if the cursor is time ascending
+func (c *combinedEngineCursor) Ascending() bool {
+	return c.ascending
+}
+
+// read will return the buffer value that is next from either the
+// WAL or index cursor and repopulate the buffer value with the
+// appropriate cursor's next value
+func (c *combinedEngineCursor) read() (key int64, value interface{}) {
+	if c.walKeyBuf == tsdb.EOF && c.engineKeyBuf == tsdb.EOF {
+		return tsdb.EOF, nil
+	}
+
+	// handle the case where they have the same point
+	if c.walKeyBuf == c.engineKeyBuf {
+		// keep the wal value since it will overwrite the engine value
+		key = c.walKeyBuf
+		value = c.walValueBuf
+		c.walKeyBuf, c.walValueBuf = c.walCursor.Next()
+
+		// overwrite the buffered engine values
+		c.engineKeyBuf, c.engineValueBuf = c.engineCursor.Next()
+		return
+	}
+
+	// ascending order
+	if c.ascending {
+		if c.walKeyBuf != tsdb.EOF && (c.walKeyBuf < c.engineKeyBuf || c.engineKeyBuf == tsdb.EOF) {
+			key = c.walKeyBuf
+			value = c.walValueBuf
+			c.walKeyBuf, c.walValueBuf = c.walCursor.Next()
+			return
+		}
+
+		key = c.engineKeyBuf
+		value = c.engineValueBuf
+		c.engineKeyBuf, c.engineValueBuf = c.engineCursor.Next()
+		return
+	}
+
+	// descending order
+	if c.walKeyBuf != tsdb.EOF && c.walKeyBuf > c.engineKeyBuf {
+		key = c.walKeyBuf
+		value = c.walValueBuf
+		c.walKeyBuf, c.walValueBuf = c.walCursor.Next()
+		return
+	}
+
+	key = c.engineKeyBuf
+	value = c.engineValueBuf
+	c.engineKeyBuf, c.engineValueBuf = c.engineCursor.Next()
+	return
+}
+
+// multieFieldCursor wraps cursors for multiple fields on the same series
+// key. Instead of returning a plain interface value in the call for Next(),
+// it returns a map[string]interface{} for the field values
+type multiFieldCursor struct {
+	fields      []string
+	cursors     []tsdb.Cursor
+	ascending   bool
+	keyBuffer   []int64
+	valueBuffer []interface{}
+}
+
+func NewMultiFieldCursor(fields []string, cursors []tsdb.Cursor, ascending bool) tsdb.Cursor {
+	return &multiFieldCursor{
+		fields:      fields,
+		cursors:     cursors,
+		ascending:   ascending,
+		keyBuffer:   make([]int64, len(cursors)),
+		valueBuffer: make([]interface{}, len(cursors)),
+	}
+}
+
+func (m *multiFieldCursor) SeekTo(seek int64) (key int64, value interface{}) {
+	for i, c := range m.cursors {
+		m.keyBuffer[i], m.valueBuffer[i] = c.SeekTo(seek)
+	}
+	return m.read()
+}
+
+func (m *multiFieldCursor) Next() (int64, interface{}) {
+	return m.read()
+}
+
+func (m *multiFieldCursor) Ascending() bool {
+	return m.ascending
+}
+
+func (m *multiFieldCursor) read() (int64, interface{}) {
+	t := int64(math.MaxInt64)
+	if !m.ascending {
+		t = int64(math.MinInt64)
+	}
+
+	// find the time we need to combine all fields
+	for _, k := range m.keyBuffer {
+		if k == tsdb.EOF {
+			continue
+		}
+		if m.ascending && t > k {
+			t = k
+		} else if !m.ascending && t < k {
+			t = k
+		}
+	}
+
+	// get the value and advance each of the cursors that have the matching time
+	if t == math.MinInt64 || t == math.MaxInt64 {
+		return tsdb.EOF, nil
+	}
+
+	mm := make(map[string]interface{})
+	for i, k := range m.keyBuffer {
+		if k == t {
+			mm[m.fields[i]] = m.valueBuffer[i]
+			m.keyBuffer[i], m.valueBuffer[i] = m.cursors[i].Next()
+		}
+	}
+	return t, mm
+}
+
+type emptyCursor struct {
+	ascending bool
+}
+
+func (c *emptyCursor) Next() (int64, interface{})            { return tsdb.EOF, nil }
+func (c *emptyCursor) SeekTo(key int64) (int64, interface{}) { return tsdb.EOF, nil }
+func (c *emptyCursor) Ascending() bool                       { return c.ascending }
+
+// cursor is a cursor for the data in the index
+type cursor struct {
+	// id for the series key and field
+	id uint64
+
+	// f is the current data file we're reading from
+	f *dataFile
+
+	// filesPos is the position in the files index we're reading from
+	filesPos int // the index in the files slice we're looking at
+
+	// pos is the position in the current data file we're reading
+	pos uint32
+
+	// vals is the current decoded block of Values we're iterating from
+	vals Values
+
+	ascending bool
+
+	// blockPositions is used for descending queries to keep track
+	// of what positions in the current data file encoded blocks for
+	// the id exist at
+	blockPositions []uint32
+
+	// time acending slice of read only data files
+	files []*dataFile
+}
+
+func newCursor(id uint64, files []*dataFile, ascending bool) *cursor {
+	return &cursor{
+		id:        id,
+		ascending: ascending,
+		files:     files,
+	}
+}
+
+func (c *cursor) SeekTo(seek int64) (int64, interface{}) {
+	if len(c.files) == 0 {
+		return tsdb.EOF, nil
+	}
+
+	if c.ascending {
+		if seek <= c.files[0].MinTime() {
+			c.filesPos = 0
+			c.f = c.files[0]
+		} else {
+			for i, f := range c.files {
+				if seek >= f.MinTime() && seek <= f.MaxTime() {
+					c.filesPos = i
+					c.f = f
+					break
+				}
+			}
+		}
+	} else {
+		if seek >= c.files[len(c.files)-1].MaxTime() {
+			c.filesPos = len(c.files) - 1
+			c.f = c.files[c.filesPos]
+		} else if seek < c.files[0].MinTime() {
+			return tsdb.EOF, nil
+		} else {
+			for i, f := range c.files {
+				if seek >= f.MinTime() && seek <= f.MaxTime() {
+					c.filesPos = i
+					c.f = f
+					break
+				}
+			}
+		}
+	}
+
+	if c.f == nil {
+		return tsdb.EOF, nil
+	}
+
+	// find the first file we need to check in
+	for {
+		if c.filesPos < 0 || c.filesPos >= len(c.files) {
+			return tsdb.EOF, nil
+		}
+		c.f = c.files[c.filesPos]
+
+		c.pos = c.f.StartingPositionForID(c.id)
+
+		// if this id isn't in this file, move to next one or return
+		if c.pos == 0 {
+			if c.ascending {
+				c.filesPos++
+			} else {
+				c.filesPos--
+				c.blockPositions = nil
+			}
+			continue
+		}
+
+		// handle seek for correct order
+		k := tsdb.EOF
+		var v interface{}
+
+		if c.ascending {
+			k, v = c.seekAscending(seek)
+		} else {
+			k, v = c.seekDescending(seek)
+		}
+
+		if k != tsdb.EOF {
+			return k, v
+		}
+
+		if c.ascending {
+			c.filesPos++
+		} else {
+			c.filesPos--
+			c.blockPositions = nil
+		}
+	}
+}
+
+func (c *cursor) seekAscending(seek int64) (int64, interface{}) {
+	// seek to the block and values we're looking for
+	for {
+		// if the time is between this block and the next,
+		// decode this block and go, otherwise seek to next block
+		length := c.blockLength(c.pos)
+
+		// if the next block has a time less than what we're seeking to,
+		// skip decoding this block and continue on
+		nextBlockPos := c.pos + blockHeaderSize + length
+		if nextBlockPos < c.f.indexPosition() {
+			nextBlockID := btou64(c.f.mmap[nextBlockPos : nextBlockPos+8])
+			if nextBlockID == c.id {
+				nextBlockTime := c.blockMinTime(nextBlockPos)
+				if nextBlockTime <= seek {
+					c.pos = nextBlockPos
+					continue
+				}
+			}
+		}
+
+		// it must be in this block or not at all
+		id := btou64((c.f.mmap[c.pos : c.pos+8]))
+		if id != c.id {
+			return tsdb.EOF, nil
+		}
+		c.decodeBlock(c.pos)
+
+		// see if we can find it in this block
+		for i, v := range c.vals {
+			if v.Time().UnixNano() >= seek {
+				c.vals = c.vals[i+1:]
+				return v.Time().UnixNano(), v.Value()
+			}
+		}
+	}
+}
+
+func (c *cursor) seekDescending(seek int64) (int64, interface{}) {
+	c.setBlockPositions()
+	if len(c.blockPositions) == 0 {
+		return tsdb.EOF, nil
+	}
+
+	for i := len(c.blockPositions) - 1; i >= 0; i-- {
+		pos := c.blockPositions[i]
+		if c.blockMinTime(pos) > seek {
+			continue
+		}
+
+		c.decodeBlock(pos)
+		c.blockPositions = c.blockPositions[:i]
+
+		for i := len(c.vals) - 1; i >= 0; i-- {
+			val := c.vals[i]
+			if seek >= val.UnixNano() {
+				c.vals = c.vals[:i]
+				return val.UnixNano(), val.Value()
+			}
+			if seek < val.UnixNano() {
+				// we need to move to the next block
+				if i == 0 {
+					break
+				}
+				val := c.vals[i-1]
+				c.vals = c.vals[:i-1]
+				return val.UnixNano(), val.Value()
+			}
+		}
+		c.blockPositions = c.blockPositions[:i]
+	}
+
+	return tsdb.EOF, nil
+}
+
+// blockMinTime is the minimum time for the block
+func (c *cursor) blockMinTime(pos uint32) int64 {
+	return int64(btou64(c.f.mmap[pos+12 : pos+20]))
+}
+
+// setBlockPositions will read the positions of all
+// blocks for the cursor id in the given data file
+func (c *cursor) setBlockPositions() {
+	pos := c.pos
+
+	for {
+		if pos >= c.f.indexPosition() {
+			return
+		}
+
+		length := c.blockLength(pos)
+		id := btou64(c.f.mmap[pos : pos+8])
+
+		if id != c.id {
+			return
+		}
+
+		c.blockPositions = append(c.blockPositions, pos)
+		pos += blockHeaderSize + length
+	}
+}
+
+func (c *cursor) Next() (int64, interface{}) {
+	if c.ascending {
+		k, v := c.nextAscending()
+		return k, v
+	}
+	return c.nextDescending()
+}
+
+func (c *cursor) nextAscending() (int64, interface{}) {
+	if len(c.vals) > 0 {
+		v := c.vals[0]
+		c.vals = c.vals[1:]
+
+		return v.Time().UnixNano(), v.Value()
+	}
+
+	// if we have a file set, see if the next block is for this ID
+	if c.f != nil && c.pos < c.f.indexPosition() {
+		nextBlockID := btou64(c.f.mmap[c.pos : c.pos+8])
+		if nextBlockID == c.id {
+			c.decodeBlock(c.pos)
+			return c.nextAscending()
+		}
+	}
+
+	// loop through the files until we hit the next one that has this id
+	for {
+		c.filesPos++
+		if c.filesPos >= len(c.files) {
+			return tsdb.EOF, nil
+		}
+		c.f = c.files[c.filesPos]
+
+		startingPos := c.f.StartingPositionForID(c.id)
+		if startingPos == 0 {
+			// move to next file because it isn't in this one
+			continue
+		}
+
+		// we have a block with this id, decode and return
+		c.decodeBlock(startingPos)
+		return c.nextAscending()
+	}
+}
+
+func (c *cursor) nextDescending() (int64, interface{}) {
+	if len(c.vals) > 0 {
+		v := c.vals[len(c.vals)-1]
+		if len(c.vals) >= 1 {
+			c.vals = c.vals[:len(c.vals)-1]
+		} else {
+			c.vals = nil
+		}
+		return v.UnixNano(), v.Value()
+	}
+
+	for i := len(c.blockPositions) - 1; i >= 0; i-- {
+		c.decodeBlock(c.blockPositions[i])
+		c.blockPositions = c.blockPositions[:i]
+		if len(c.vals) == 0 {
+			continue
+		}
+		val := c.vals[len(c.vals)-1]
+		c.vals = c.vals[:len(c.vals)-1]
+		return val.UnixNano(), val.Value()
+	}
+
+	return tsdb.EOF, nil
+}
+
+func (c *cursor) blockLength(pos uint32) uint32 {
+	return btou32(c.f.mmap[pos+8 : pos+12])
+}
+
+// decodeBlock will decod the block and set the vals
+func (c *cursor) decodeBlock(position uint32) {
+	length := c.blockLength(position)
+	block := c.f.mmap[position+blockHeaderSize : position+blockHeaderSize+length]
+	c.vals, _ = DecodeBlock(block)
+
+	// only adavance the position if we're asceending.
+	// Descending queries use the blockPositions
+	if c.ascending {
+		c.pos = position + blockHeaderSize + length
+	}
+}
+
+func (c *cursor) Ascending() bool { return c.ascending }
diff --git a/tsdb/engine/tsm1/encoding.go b/tsdb/engine/tsm1/encoding.go
new file mode 100644
index 00000000000..3de88586320
--- /dev/null
+++ b/tsdb/engine/tsm1/encoding.go
@@ -0,0 +1,554 @@
+package tsm1
+
+import (
+	"encoding/binary"
+	"fmt"
+	"sort"
+	"time"
+
+	"github.com/influxdb/influxdb/tsdb"
+)
+
+const (
+	// BlockFloat64 designates a block encodes float64 values
+	BlockFloat64 = 0
+
+	// BlockInt64 designates a block encodes int64 values
+	BlockInt64 = 1
+
+	// BlockBool designates a block encodes bool values
+	BlockBool = 2
+
+	// BlockString designates a block encodes string values
+	BlockString = 3
+
+	// encodedBlockHeaderSize is the size of the header for an encoded block.  The first 8 bytes
+	// are the minimum timestamp of the block.  The next byte is a block encoding type indicator.
+	encodedBlockHeaderSize = 9
+)
+
+type Value interface {
+	Time() time.Time
+	UnixNano() int64
+	Value() interface{}
+	Size() int
+}
+
+func NewValue(t time.Time, value interface{}) Value {
+	switch v := value.(type) {
+	case int64:
+		return &Int64Value{time: t, value: v}
+	case float64:
+		return &FloatValue{time: t, value: v}
+	case bool:
+		return &BoolValue{time: t, value: v}
+	case string:
+		return &StringValue{time: t, value: v}
+	}
+	return &EmptyValue{}
+}
+
+type EmptyValue struct {
+}
+
+func (e *EmptyValue) UnixNano() int64    { return tsdb.EOF }
+func (e *EmptyValue) Time() time.Time    { return time.Unix(0, tsdb.EOF) }
+func (e *EmptyValue) Value() interface{} { return nil }
+func (e *EmptyValue) Size() int          { return 0 }
+
+// Values represented a time ascending sorted collection of Value types.
+// the underlying type should be the same across all values, but the interface
+// makes the code cleaner.
+type Values []Value
+
+func (v Values) MinTime() int64 {
+	return v[0].Time().UnixNano()
+}
+
+func (v Values) MaxTime() int64 {
+	return v[len(v)-1].Time().UnixNano()
+}
+
+func (v Values) Encode(buf []byte) ([]byte, error) {
+	switch v[0].(type) {
+	case *FloatValue:
+		return encodeFloatBlock(buf, v)
+	case *Int64Value:
+		return encodeInt64Block(buf, v)
+	case *BoolValue:
+		return encodeBoolBlock(buf, v)
+	case *StringValue:
+		return encodeStringBlock(buf, v)
+	}
+
+	return nil, fmt.Errorf("unsupported value type %T", v[0])
+}
+
+func (v Values) DecodeSameTypeBlock(block []byte) Values {
+	switch v[0].(type) {
+	case *FloatValue:
+		a, _ := decodeFloatBlock(block)
+		return a
+	case *Int64Value:
+		a, _ := decodeInt64Block(block)
+		return a
+	case *BoolValue:
+		a, _ := decodeBoolBlock(block)
+		return a
+	case *StringValue:
+		a, _ := decodeStringBlock(block)
+		return a
+	}
+	return nil
+}
+
+// DecodeBlock takes a byte array and will decode into values of the appropriate type
+// based on the block
+func DecodeBlock(block []byte) (Values, error) {
+	if len(block) <= encodedBlockHeaderSize {
+		panic(fmt.Sprintf("decode of short block: got %v, exp %v", len(block), encodedBlockHeaderSize))
+	}
+
+	blockType := block[8]
+	switch blockType {
+	case BlockFloat64:
+		return decodeFloatBlock(block)
+	case BlockInt64:
+		return decodeInt64Block(block)
+	case BlockBool:
+		return decodeBoolBlock(block)
+	case BlockString:
+		return decodeStringBlock(block)
+	default:
+		panic(fmt.Sprintf("unknown block type: %d", blockType))
+	}
+}
+
+// Deduplicate returns a new Values slice with any values
+// that have the same  timestamp removed. The Value that appears
+// last in the slice is the one that is kept. The returned slice is in ascending order
+func (v Values) Deduplicate() Values {
+	m := make(map[int64]Value)
+	for _, val := range v {
+		m[val.UnixNano()] = val
+	}
+
+	a := make([]Value, 0, len(m))
+	for _, val := range m {
+		a = append(a, val)
+	}
+	sort.Sort(Values(a))
+
+	return a
+}
+
+// Sort methods
+func (a Values) Len() int           { return len(a) }
+func (a Values) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
+func (a Values) Less(i, j int) bool { return a[i].Time().UnixNano() < a[j].Time().UnixNano() }
+
+type FloatValue struct {
+	time  time.Time
+	value float64
+}
+
+func (f *FloatValue) Time() time.Time {
+	return f.time
+}
+
+func (f *FloatValue) UnixNano() int64 {
+	return f.time.UnixNano()
+}
+
+func (f *FloatValue) Value() interface{} {
+	return f.value
+}
+
+func (f *FloatValue) Size() int {
+	return 16
+}
+
+func encodeFloatBlock(buf []byte, values []Value) ([]byte, error) {
+	if len(values) == 0 {
+		return nil, nil
+	}
+
+	// A float block is encoded using different compression strategies
+	// for timestamps and values.
+
+	// Encode values using Gorilla float compression
+	venc := NewFloatEncoder()
+
+	// Encode timestamps using an adaptive encoder that uses delta-encoding,
+	// frame-or-reference and run length encoding.
+	tsenc := NewTimeEncoder()
+
+	for _, v := range values {
+		tsenc.Write(v.Time())
+		venc.Push(v.(*FloatValue).value)
+	}
+	venc.Finish()
+
+	// Encoded timestamp values
+	tb, err := tsenc.Bytes()
+	if err != nil {
+		return nil, err
+	}
+	// Encoded float values
+	vb := venc.Bytes()
+
+	// Prepend the first timestamp of the block in the first 8 bytes and the block
+	// in the next byte, followed by the block
+	block := packBlockHeader(values[0].Time(), BlockFloat64)
+	block = append(block, packBlock(tb, vb)...)
+	return block, nil
+}
+
+func decodeFloatBlock(block []byte) ([]Value, error) {
+	// The first 8 bytes is the minimum timestamp of the block
+	block = block[8:]
+
+	// Block type is the next block, make sure we actually have a float block
+	blockType := block[0]
+	if blockType != BlockFloat64 {
+		return nil, fmt.Errorf("invalid block type: exp %d, got %d", BlockFloat64, blockType)
+	}
+	block = block[1:]
+
+	tb, vb := unpackBlock(block)
+
+	// Setup our timestamp and value decoders
+	dec := NewTimeDecoder(tb)
+	iter, err := NewFloatDecoder(vb)
+	if err != nil {
+		return nil, err
+	}
+
+	// Decode both a timestamp and value
+	var a []Value
+	for dec.Next() && iter.Next() {
+		ts := dec.Read()
+		v := iter.Values()
+		a = append(a, &FloatValue{ts, v})
+	}
+
+	// Did timestamp decoding have an error?
+	if dec.Error() != nil {
+		return nil, dec.Error()
+	}
+	// Did float decoding have an error?
+	if iter.Error() != nil {
+		return nil, iter.Error()
+	}
+
+	return a, nil
+}
+
+type BoolValue struct {
+	time  time.Time
+	value bool
+}
+
+func (b *BoolValue) Time() time.Time {
+	return b.time
+}
+
+func (b *BoolValue) Size() int {
+	return 9
+}
+
+func (b *BoolValue) UnixNano() int64 {
+	return b.time.UnixNano()
+}
+
+func (b *BoolValue) Value() interface{} {
+	return b.value
+}
+
+func encodeBoolBlock(buf []byte, values []Value) ([]byte, error) {
+	if len(values) == 0 {
+		return nil, nil
+	}
+
+	// A bool block is encoded using different compression strategies
+	// for timestamps and values.
+
+	// Encode values using Gorilla float compression
+	venc := NewBoolEncoder()
+
+	// Encode timestamps using an adaptive encoder
+	tsenc := NewTimeEncoder()
+
+	for _, v := range values {
+		tsenc.Write(v.Time())
+		venc.Write(v.(*BoolValue).value)
+	}
+
+	// Encoded timestamp values
+	tb, err := tsenc.Bytes()
+	if err != nil {
+		return nil, err
+	}
+	// Encoded float values
+	vb, err := venc.Bytes()
+	if err != nil {
+		return nil, err
+	}
+
+	// Prepend the first timestamp of the block in the first 8 bytes and the block
+	// in the next byte, followed by the block
+	block := packBlockHeader(values[0].Time(), BlockBool)
+	block = append(block, packBlock(tb, vb)...)
+	return block, nil
+}
+
+func decodeBoolBlock(block []byte) ([]Value, error) {
+	// The first 8 bytes is the minimum timestamp of the block
+	block = block[8:]
+
+	// Block type is the next block, make sure we actually have a float block
+	blockType := block[0]
+	if blockType != BlockBool {
+		return nil, fmt.Errorf("invalid block type: exp %d, got %d", BlockBool, blockType)
+	}
+	block = block[1:]
+
+	tb, vb := unpackBlock(block)
+
+	// Setup our timestamp and value decoders
+	dec := NewTimeDecoder(tb)
+	vdec := NewBoolDecoder(vb)
+
+	// Decode both a timestamp and value
+	var a []Value
+	for dec.Next() && vdec.Next() {
+		ts := dec.Read()
+		v := vdec.Read()
+		a = append(a, &BoolValue{ts, v})
+	}
+
+	// Did timestamp decoding have an error?
+	if dec.Error() != nil {
+		return nil, dec.Error()
+	}
+	// Did bool decoding have an error?
+	if vdec.Error() != nil {
+		return nil, vdec.Error()
+	}
+
+	return a, nil
+}
+
+type Int64Value struct {
+	time  time.Time
+	value int64
+}
+
+func (v *Int64Value) Time() time.Time {
+	return v.time
+}
+
+func (v *Int64Value) Value() interface{} {
+	return v.value
+}
+
+func (f *Int64Value) UnixNano() int64 {
+	return f.time.UnixNano()
+}
+
+func (v *Int64Value) Size() int {
+	return 16
+}
+
+func (v *Int64Value) String() string { return fmt.Sprintf("%v", v.value) }
+
+func encodeInt64Block(buf []byte, values []Value) ([]byte, error) {
+	tsEnc := NewTimeEncoder()
+	vEnc := NewInt64Encoder()
+	for _, v := range values {
+		tsEnc.Write(v.Time())
+		vEnc.Write(v.(*Int64Value).value)
+	}
+
+	// Encoded timestamp values
+	tb, err := tsEnc.Bytes()
+	if err != nil {
+		return nil, err
+	}
+	// Encoded int64 values
+	vb, err := vEnc.Bytes()
+	if err != nil {
+		return nil, err
+	}
+
+	// Prepend the first timestamp of the block in the first 8 bytes
+	block := packBlockHeader(values[0].Time(), BlockInt64)
+	return append(block, packBlock(tb, vb)...), nil
+}
+
+func decodeInt64Block(block []byte) ([]Value, error) {
+	// slice off the first 8 bytes (min timestmap for the block)
+	block = block[8:]
+
+	blockType := block[0]
+	if blockType != BlockInt64 {
+		return nil, fmt.Errorf("invalid block type: exp %d, got %d", BlockInt64, blockType)
+	}
+
+	block = block[1:]
+
+	// The first 8 bytes is the minimum timestamp of the block
+	tb, vb := unpackBlock(block)
+
+	// Setup our timestamp and value decoders
+	tsDec := NewTimeDecoder(tb)
+	vDec := NewInt64Decoder(vb)
+
+	// Decode both a timestamp and value
+	var a []Value
+	for tsDec.Next() && vDec.Next() {
+		ts := tsDec.Read()
+		v := vDec.Read()
+		a = append(a, &Int64Value{ts, v})
+	}
+
+	// Did timestamp decoding have an error?
+	if tsDec.Error() != nil {
+		return nil, tsDec.Error()
+	}
+	// Did int64 decoding have an error?
+	if vDec.Error() != nil {
+		return nil, vDec.Error()
+	}
+
+	return a, nil
+}
+
+type StringValue struct {
+	time  time.Time
+	value string
+}
+
+func (v *StringValue) Time() time.Time {
+	return v.time
+}
+
+func (v *StringValue) Value() interface{} {
+	return v.value
+}
+
+func (v *StringValue) UnixNano() int64 {
+	return v.time.UnixNano()
+}
+
+func (v *StringValue) Size() int {
+	return 8 + len(v.value)
+}
+
+func (v *StringValue) String() string { return v.value }
+
+func encodeStringBlock(buf []byte, values []Value) ([]byte, error) {
+	tsEnc := NewTimeEncoder()
+	vEnc := NewStringEncoder()
+	for _, v := range values {
+		tsEnc.Write(v.Time())
+		vEnc.Write(v.(*StringValue).value)
+	}
+
+	// Encoded timestamp values
+	tb, err := tsEnc.Bytes()
+	if err != nil {
+		return nil, err
+	}
+	// Encoded string values
+	vb, err := vEnc.Bytes()
+	if err != nil {
+		return nil, err
+	}
+
+	// Prepend the first timestamp of the block in the first 8 bytes
+	block := packBlockHeader(values[0].Time(), BlockString)
+	return append(block, packBlock(tb, vb)...), nil
+}
+
+func decodeStringBlock(block []byte) ([]Value, error) {
+	// slice off the first 8 bytes (min timestmap for the block)
+	block = block[8:]
+
+	blockType := block[0]
+	if blockType != BlockString {
+		return nil, fmt.Errorf("invalid block type: exp %d, got %d", BlockString, blockType)
+	}
+
+	block = block[1:]
+
+	// The first 8 bytes is the minimum timestamp of the block
+	tb, vb := unpackBlock(block)
+
+	// Setup our timestamp and value decoders
+	tsDec := NewTimeDecoder(tb)
+	vDec, err := NewStringDecoder(vb)
+	if err != nil {
+		return nil, err
+	}
+
+	// Decode both a timestamp and value
+	var a []Value
+	for tsDec.Next() && vDec.Next() {
+		ts := tsDec.Read()
+		v := vDec.Read()
+		a = append(a, &StringValue{ts, v})
+	}
+
+	// Did timestamp decoding have an error?
+	if tsDec.Error() != nil {
+		return nil, tsDec.Error()
+	}
+	// Did string decoding have an error?
+	if vDec.Error() != nil {
+		return nil, vDec.Error()
+	}
+
+	return a, nil
+}
+
+func packBlockHeader(firstTime time.Time, blockType byte) []byte {
+	return append(u64tob(uint64(firstTime.UnixNano())), blockType)
+}
+
+func packBlock(ts []byte, values []byte) []byte {
+	// We encode the length of the timestamp block using a variable byte encoding.
+	// This allows small byte slices to take up 1 byte while larger ones use 2 or more.
+	b := make([]byte, 10)
+	i := binary.PutUvarint(b, uint64(len(ts)))
+
+	// block is <len timestamp bytes>, <ts bytes>, <value bytes>
+	block := append(b[:i], ts...)
+
+	// We don't encode the value length because we know it's the rest of the block after
+	// the timestamp block.
+	return append(block, values...)
+}
+
+func unpackBlock(buf []byte) (ts, values []byte) {
+	// Unpack the timestamp block length
+	tsLen, i := binary.Uvarint(buf)
+
+	// Unpack the timestamp bytes
+	ts = buf[int(i) : int(i)+int(tsLen)]
+
+	// Unpack the value bytes
+	values = buf[int(i)+int(tsLen):]
+	return
+}
+
+// ZigZagEncode converts a int64 to a uint64 by zig zagging negative and positive values
+// across even and odd numbers.  Eg. [0,-1,1,-2] becomes [0, 1, 2, 3]
+func ZigZagEncode(x int64) uint64 {
+	return uint64(uint64(x<<1) ^ uint64((int64(x) >> 63)))
+}
+
+// ZigZagDecode converts a previously zigzag encoded uint64 back to a int64
+func ZigZagDecode(v uint64) int64 {
+	return int64((v >> 1) ^ uint64((int64(v&1)<<63)>>63))
+}
diff --git a/tsdb/engine/tsm1/encoding_test.go b/tsdb/engine/tsm1/encoding_test.go
new file mode 100644
index 00000000000..309b947eb60
--- /dev/null
+++ b/tsdb/engine/tsm1/encoding_test.go
@@ -0,0 +1,158 @@
+package tsm1_test
+
+import (
+	// "math/rand"
+
+	"fmt"
+	"reflect"
+	"testing"
+	"time"
+
+	"github.com/influxdb/influxdb/tsdb/engine/tsm1"
+)
+
+func TestEncoding_FloatBlock(t *testing.T) {
+	valueCount := 1000
+	times := getTimes(valueCount, 60, time.Second)
+	values := make(tsm1.Values, len(times))
+	for i, t := range times {
+		values[i] = tsm1.NewValue(t, float64(i))
+	}
+
+	b, err := values.Encode(nil)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	decodedValues := values.DecodeSameTypeBlock(b)
+
+	if !reflect.DeepEqual(decodedValues, values) {
+		t.Fatalf("unexpected results:\n\tgot: %v\n\texp: %v\n", decodedValues, values)
+	}
+}
+
+func TestEncoding_FloatBlock_ZeroTime(t *testing.T) {
+	values := make(tsm1.Values, 3)
+	for i := 0; i < 3; i++ {
+		values[i] = tsm1.NewValue(time.Unix(0, 0), float64(i))
+	}
+
+	b, err := values.Encode(nil)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	decodedValues := values.DecodeSameTypeBlock(b)
+
+	if !reflect.DeepEqual(decodedValues, values) {
+		t.Fatalf("unexpected results:\n\tgot: %v\n\texp: %v\n", decodedValues, values)
+	}
+}
+
+func TestEncoding_IntBlock_Basic(t *testing.T) {
+	valueCount := 1000
+	times := getTimes(valueCount, 60, time.Second)
+	values := make(tsm1.Values, len(times))
+	for i, t := range times {
+		values[i] = tsm1.NewValue(t, int64(i))
+	}
+
+	b, err := values.Encode(nil)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	decodedValues := values.DecodeSameTypeBlock(b)
+
+	if len(decodedValues) != len(values) {
+		t.Fatalf("unexpected results length:\n\tgot: %v\n\texp: %v\n", len(decodedValues), len(values))
+	}
+
+	for i := 0; i < len(decodedValues); i++ {
+
+		if decodedValues[i].Time() != values[i].Time() {
+			t.Fatalf("unexpected results:\n\tgot: %v\n\texp: %v\n", decodedValues[i].Time(), values[i].Time())
+		}
+
+		if decodedValues[i].Value() != values[i].Value() {
+			t.Fatalf("unexpected results:\n\tgot: %v\n\texp: %v\n", decodedValues[i].Value(), values[i].Value())
+		}
+	}
+}
+
+func TestEncoding_IntBlock_Negatives(t *testing.T) {
+	valueCount := 1000
+	times := getTimes(valueCount, 60, time.Second)
+	values := make(tsm1.Values, len(times))
+	for i, t := range times {
+		v := int64(i)
+		if i%2 == 0 {
+			v = -v
+		}
+		values[i] = tsm1.NewValue(t, int64(v))
+	}
+
+	b, err := values.Encode(nil)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	decodedValues := values.DecodeSameTypeBlock(b)
+
+	if !reflect.DeepEqual(decodedValues, values) {
+		t.Fatalf("unexpected results:\n\tgot: %v\n\texp: %v\n", decodedValues, values)
+	}
+}
+
+func TestEncoding_BoolBlock_Basic(t *testing.T) {
+	valueCount := 1000
+	times := getTimes(valueCount, 60, time.Second)
+	values := make(tsm1.Values, len(times))
+	for i, t := range times {
+		v := true
+		if i%2 == 0 {
+			v = false
+		}
+		values[i] = tsm1.NewValue(t, v)
+	}
+
+	b, err := values.Encode(nil)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	decodedValues := values.DecodeSameTypeBlock(b)
+
+	if !reflect.DeepEqual(decodedValues, values) {
+		t.Fatalf("unexpected results:\n\tgot: %v\n\texp: %v\n", decodedValues, values)
+	}
+}
+
+func TestEncoding_StringBlock_Basic(t *testing.T) {
+	valueCount := 1000
+	times := getTimes(valueCount, 60, time.Second)
+	values := make(tsm1.Values, len(times))
+	for i, t := range times {
+		values[i] = tsm1.NewValue(t, fmt.Sprintf("value %d", i))
+	}
+
+	b, err := values.Encode(nil)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	decodedValues := values.DecodeSameTypeBlock(b)
+
+	if !reflect.DeepEqual(decodedValues, values) {
+		t.Fatalf("unexpected results:\n\tgot: %v\n\texp: %v\n", decodedValues, values)
+	}
+}
+
+func getTimes(n, step int, precision time.Duration) []time.Time {
+	t := time.Now().Round(precision)
+	a := make([]time.Time, n)
+	for i := 0; i < n; i++ {
+		a[i] = t.Add(time.Duration(i*60) * precision)
+	}
+	return a
+}
diff --git a/tsdb/engine/tsm1/float.go b/tsdb/engine/tsm1/float.go
new file mode 100644
index 00000000000..8961c70f4ff
--- /dev/null
+++ b/tsdb/engine/tsm1/float.go
@@ -0,0 +1,210 @@
+package tsm1
+
+/*
+This code is originally from: https://github.com/dgryski/go-tsz and has been modified to remove
+the timestamp compression fuctionality.
+
+It implements the float compression as presented in: http://www.vldb.org/pvldb/vol8/p1816-teller.pdf.
+This implementation uses a sentinel value of NaN which means that float64 NaN cannot be stored using
+this version.
+*/
+
+import (
+	"bytes"
+	"math"
+
+	"github.com/dgryski/go-bits"
+	"github.com/dgryski/go-bitstream"
+)
+
+const (
+	// floatUncompressed is an uncompressed format using 8 bytes per value
+	floatUncompressed = 0
+	// floatCompressedGorilla is a compressed format using the gorilla paper encoding
+	floatCompressedGorilla = 1
+)
+
+// FloatEncoder encodes multiple float64s into a byte slice
+type FloatEncoder struct {
+	val float64
+
+	leading  uint64
+	trailing uint64
+
+	buf bytes.Buffer
+	bw  *bitstream.BitWriter
+
+	first    bool
+	finished bool
+}
+
+func NewFloatEncoder() *FloatEncoder {
+	s := FloatEncoder{
+		first:   true,
+		leading: ^uint64(0),
+	}
+
+	s.bw = bitstream.NewWriter(&s.buf)
+
+	return &s
+
+}
+
+func (s *FloatEncoder) Bytes() []byte {
+	return append([]byte{floatCompressedGorilla << 4}, s.buf.Bytes()...)
+}
+
+func (s *FloatEncoder) Finish() {
+	if !s.finished {
+		// // write an end-of-stream record
+		s.Push(math.NaN())
+		s.bw.Flush(bitstream.Zero)
+		s.finished = true
+	}
+}
+
+func (s *FloatEncoder) Push(v float64) {
+	if s.first {
+		// first point
+		s.val = v
+		s.first = false
+		s.bw.WriteBits(math.Float64bits(v), 64)
+		return
+	}
+
+	vDelta := math.Float64bits(v) ^ math.Float64bits(s.val)
+
+	if vDelta == 0 {
+		s.bw.WriteBit(bitstream.Zero)
+	} else {
+		s.bw.WriteBit(bitstream.One)
+
+		leading := bits.Clz(vDelta)
+		trailing := bits.Ctz(vDelta)
+
+		// TODO(dgryski): check if it's 'cheaper' to reset the leading/trailing bits instead
+		if s.leading != ^uint64(0) && leading >= s.leading && trailing >= s.trailing {
+			s.bw.WriteBit(bitstream.Zero)
+			s.bw.WriteBits(vDelta>>s.trailing, 64-int(s.leading)-int(s.trailing))
+		} else {
+			s.leading, s.trailing = leading, trailing
+
+			s.bw.WriteBit(bitstream.One)
+			s.bw.WriteBits(leading, 5)
+
+			sigbits := 64 - leading - trailing
+			s.bw.WriteBits(sigbits, 6)
+			s.bw.WriteBits(vDelta>>trailing, int(sigbits))
+		}
+	}
+
+	s.val = v
+}
+
+// FloatDecoder decodes a byte slice into multipe float64 values
+type FloatDecoder struct {
+	val float64
+
+	leading  uint64
+	trailing uint64
+
+	br *bitstream.BitReader
+
+	b []byte
+
+	first    bool
+	finished bool
+
+	err error
+}
+
+func NewFloatDecoder(b []byte) (*FloatDecoder, error) {
+	// first byte is the compression type but we currently just have gorilla
+	// compression
+	br := bitstream.NewReader(bytes.NewReader(b[1:]))
+
+	v, err := br.ReadBits(64)
+	if err != nil {
+		return nil, err
+	}
+
+	return &FloatDecoder{
+		val:   math.Float64frombits(v),
+		first: true,
+		br:    br,
+		b:     b,
+	}, nil
+}
+
+func (it *FloatDecoder) Next() bool {
+	if it.err != nil || it.finished {
+		return false
+	}
+
+	if it.first {
+		it.first = false
+		return true
+	}
+
+	// read compressed value
+	bit, err := it.br.ReadBit()
+	if err != nil {
+		it.err = err
+		return false
+	}
+
+	if bit == bitstream.Zero {
+		// it.val = it.val
+	} else {
+		bit, err := it.br.ReadBit()
+		if err != nil {
+			it.err = err
+			return false
+		}
+		if bit == bitstream.Zero {
+			// reuse leading/trailing zero bits
+			// it.leading, it.trailing = it.leading, it.trailing
+		} else {
+			bits, err := it.br.ReadBits(5)
+			if err != nil {
+				it.err = err
+				return false
+			}
+			it.leading = bits
+
+			bits, err = it.br.ReadBits(6)
+			if err != nil {
+				it.err = err
+				return false
+			}
+			mbits := bits
+			it.trailing = 64 - it.leading - mbits
+		}
+
+		mbits := int(64 - it.leading - it.trailing)
+		bits, err := it.br.ReadBits(mbits)
+		if err != nil {
+			it.err = err
+			return false
+		}
+		vbits := math.Float64bits(it.val)
+		vbits ^= (bits << it.trailing)
+
+		val := math.Float64frombits(vbits)
+		if math.IsNaN(val) {
+			it.finished = true
+			return false
+		}
+		it.val = val
+	}
+
+	return true
+}
+
+func (it *FloatDecoder) Values() float64 {
+	return it.val
+}
+
+func (it *FloatDecoder) Error() error {
+	return it.err
+}
diff --git a/tsdb/engine/tsm1/float_test.go b/tsdb/engine/tsm1/float_test.go
new file mode 100644
index 00000000000..794d62e5b7b
--- /dev/null
+++ b/tsdb/engine/tsm1/float_test.go
@@ -0,0 +1,165 @@
+package tsm1_test
+
+import (
+	"testing"
+
+	"github.com/influxdb/influxdb/tsdb/engine/tsm1"
+)
+
+func TestFloatEncoder_Simple(t *testing.T) {
+
+	// Example from the paper
+	s := tsm1.NewFloatEncoder()
+
+	s.Push(12)
+	s.Push(12)
+	s.Push(24)
+
+	// extra tests
+
+	// floating point masking/shifting bug
+	s.Push(13)
+	s.Push(24)
+
+	// delta-of-delta sizes
+	s.Push(24)
+	s.Push(24)
+	s.Push(24)
+
+	s.Finish()
+
+	b := s.Bytes()
+
+	it, err := tsm1.NewFloatDecoder(b)
+	if err != nil {
+		t.Fatalf("unexpected error creating float decoder: %v", err)
+	}
+
+	want := []float64{
+		12,
+		12,
+		24,
+
+		13,
+		24,
+
+		24,
+		24,
+		24,
+	}
+
+	for _, w := range want {
+		if !it.Next() {
+			t.Fatalf("Next()=false, want true")
+		}
+		vv := it.Values()
+		if w != vv {
+			t.Errorf("Values()=(%v), want (%v)\n", vv, w)
+		}
+	}
+
+	if it.Next() {
+		t.Fatalf("Next()=true, want false")
+	}
+
+	if err := it.Error(); err != nil {
+		t.Errorf("it.Error()=%v, want nil", err)
+	}
+}
+
+var TwoHoursData = []struct {
+	v float64
+}{
+	// 2h of data
+	{761}, {727}, {763}, {706}, {700},
+	{679}, {757}, {708}, {739}, {707},
+	{699}, {740}, {729}, {766}, {730},
+	{715}, {705}, {693}, {765}, {724},
+	{799}, {761}, {737}, {766}, {756},
+	{719}, {722}, {801}, {747}, {731},
+	{742}, {744}, {791}, {750}, {759},
+	{809}, {751}, {705}, {770}, {792},
+	{727}, {762}, {772}, {721}, {748},
+	{753}, {744}, {716}, {776}, {659},
+	{789}, {766}, {758}, {690}, {795},
+	{770}, {758}, {723}, {767}, {765},
+	{693}, {706}, {681}, {727}, {724},
+	{780}, {678}, {696}, {758}, {740},
+	{735}, {700}, {742}, {747}, {752},
+	{734}, {743}, {732}, {746}, {770},
+	{780}, {710}, {731}, {712}, {712},
+	{741}, {770}, {770}, {754}, {718},
+	{670}, {775}, {749}, {795}, {756},
+	{741}, {787}, {721}, {745}, {782},
+	{765}, {780}, {811}, {790}, {836},
+	{743}, {858}, {739}, {762}, {770},
+	{752}, {763}, {795}, {792}, {746},
+	{786}, {785}, {774}, {786}, {718},
+}
+
+func TestFloatEncoder_Roundtrip(t *testing.T) {
+
+	s := tsm1.NewFloatEncoder()
+	for _, p := range TwoHoursData {
+		s.Push(p.v)
+	}
+	s.Finish()
+
+	b := s.Bytes()
+
+	it, err := tsm1.NewFloatDecoder(b)
+	if err != nil {
+		t.Fatalf("unexpected error creating float decoder: %v", err)
+	}
+
+	for _, w := range TwoHoursData {
+		if !it.Next() {
+			t.Fatalf("Next()=false, want true")
+		}
+		vv := it.Values()
+		// t.Logf("it.Values()=(%+v, %+v)\n", time.Unix(int64(tt), 0), vv)
+		if w.v != vv {
+			t.Errorf("Values()=(%v), want (%v)\n", vv, w.v)
+		}
+	}
+
+	if it.Next() {
+		t.Fatalf("Next()=true, want false")
+	}
+
+	if err := it.Error(); err != nil {
+		t.Errorf("it.Error()=%v, want nil", err)
+	}
+}
+
+func BenchmarkFloatEncoder(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		s := tsm1.NewFloatEncoder()
+		for _, tt := range TwoHoursData {
+			s.Push(tt.v)
+		}
+		s.Finish()
+	}
+}
+
+func BenchmarkFloatDecoder(b *testing.B) {
+	s := tsm1.NewFloatEncoder()
+	for _, tt := range TwoHoursData {
+		s.Push(tt.v)
+	}
+	s.Finish()
+	bytes := s.Bytes()
+
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		it, err := tsm1.NewFloatDecoder(bytes)
+		if err != nil {
+			b.Fatalf("unexpected error creating float decoder: %v", err)
+		}
+
+		for j := 0; j < len(TwoHoursData); it.Next() {
+			j++
+		}
+	}
+}
diff --git a/tsdb/engine/tsm1/int.go b/tsdb/engine/tsm1/int.go
new file mode 100644
index 00000000000..9ce18fe96e3
--- /dev/null
+++ b/tsdb/engine/tsm1/int.go
@@ -0,0 +1,180 @@
+package tsm1
+
+// Int64 encoding uses two different strategies depending on the range of values in
+// the uncompressed data.  Encoded values are first encoding used zig zag encoding.
+// This interleaves postiive and negative integers across a range of positive integers.
+//
+// For example, [-2,-1,0,1] becomes [3,1,0,2]. See
+// https://developers.google.com/protocol-buffers/docs/encoding?hl=en#signed-integers
+// for more information.
+//
+// If all the zig zag encoded values less than 1 << 60 - 1, they are compressed using
+// simple8b encoding.  If any values is larger than 1 << 60 - 1, the values are stored uncompressed.
+//
+// Each encoded byte slice, contains a 1 byte header followed by multiple 8 byte packed integers
+// or 8 byte uncompressed integers.  The 4 high bits of the first byte indicate the encoding type
+// for the remaining bytes.
+//
+// There are currently two encoding types that can be used with room for 16 total.  These additional
+// encoding slots are reserved for future use.  One improvement to be made is to use a patched
+// encoding such as PFOR if only a small number of values exceed the max compressed value range.  This
+// should improve compression ratios with very large integers near the ends of the int64 range.
+
+import (
+	"encoding/binary"
+	"fmt"
+
+	"github.com/jwilder/encoding/simple8b"
+)
+
+const (
+	// intUncompressed is an uncompressed format using 8 bytes per point
+	intUncompressed = 0
+	// intCompressedSimple is a bit-packed format using simple8b encoding
+	intCompressedSimple = 1
+)
+
+// Int64Encoder encoders int64 into byte slices
+type Int64Encoder interface {
+	Write(v int64)
+	Bytes() ([]byte, error)
+}
+
+// Int64Decoder decodes a byte slice into int64s
+type Int64Decoder interface {
+	Next() bool
+	Read() int64
+	Error() error
+}
+
+type int64Encoder struct {
+	values []uint64
+}
+
+func NewInt64Encoder() Int64Encoder {
+	return &int64Encoder{}
+}
+
+func (e *int64Encoder) Write(v int64) {
+	e.values = append(e.values, ZigZagEncode(v))
+}
+
+func (e *int64Encoder) Bytes() ([]byte, error) {
+	for _, v := range e.values {
+		// Value is too large to encode using packed format
+		if v > simple8b.MaxValue {
+			return e.encodeUncompressed()
+		}
+	}
+
+	return e.encodePacked()
+}
+
+func (e *int64Encoder) encodePacked() ([]byte, error) {
+	encoded, err := simple8b.EncodeAll(e.values)
+	if err != nil {
+		return nil, err
+	}
+
+	b := make([]byte, 1+len(encoded)*8)
+	// 4 high bits of first byte store the encoding type for the block
+	b[0] = byte(intCompressedSimple) << 4
+
+	for i, v := range encoded {
+		binary.BigEndian.PutUint64(b[1+i*8:1+i*8+8], v)
+	}
+	return b, nil
+}
+
+func (e *int64Encoder) encodeUncompressed() ([]byte, error) {
+	b := make([]byte, 1+len(e.values)*8)
+	// 4 high bits of first byte store the encoding type for the block
+	b[0] = byte(intUncompressed) << 4
+
+	for i, v := range e.values {
+		binary.BigEndian.PutUint64(b[1+i*8:1+i*8+8], v)
+	}
+	return b, nil
+}
+
+type int64Decoder struct {
+	values []uint64
+	bytes  []byte
+	i      int
+	n      int
+
+	encoding byte
+	err      error
+}
+
+func NewInt64Decoder(b []byte) Int64Decoder {
+	d := &int64Decoder{
+		// 240 is the maximum number of values that can be encoded into a single uint64 using simple8b
+		values: make([]uint64, 240),
+	}
+
+	d.SetBytes(b)
+	return d
+}
+
+func (d *int64Decoder) SetBytes(b []byte) {
+	if len(b) > 0 {
+		d.encoding = b[0] >> 4
+		d.bytes = b[1:]
+	}
+	d.i = 0
+	d.n = 0
+}
+
+func (d *int64Decoder) Next() bool {
+	if d.i >= d.n && len(d.bytes) == 0 {
+		return false
+	}
+
+	d.i += 1
+
+	if d.i >= d.n {
+		switch d.encoding {
+		case intUncompressed:
+			d.decodeUncompressed()
+		case intCompressedSimple:
+			d.decodePacked()
+		default:
+			d.err = fmt.Errorf("unknown encoding %v", d.encoding)
+		}
+	}
+	return d.i < d.n
+}
+
+func (d *int64Decoder) Error() error {
+	return d.err
+}
+
+func (d *int64Decoder) Read() int64 {
+	return ZigZagDecode(d.values[d.i])
+}
+
+func (d *int64Decoder) decodePacked() {
+	if len(d.bytes) == 0 {
+		return
+	}
+
+	v := binary.BigEndian.Uint64(d.bytes[0:8])
+	n, err := simple8b.Decode(d.values, v)
+	if err != nil {
+		// Should never happen, only error that could be returned is if the the value to be decoded was not
+		// actually encoded by simple8b encoder.
+		d.err = fmt.Errorf("failed to decode value %v: %v", v, err)
+	}
+
+	d.n = n
+	d.i = 0
+	d.bytes = d.bytes[8:]
+}
+
+func (d *int64Decoder) decodeUncompressed() {
+	d.values[0] = binary.BigEndian.Uint64(d.bytes[0:8])
+	d.i = 0
+	d.n = 1
+	d.bytes = d.bytes[8:]
+}
diff --git a/tsdb/engine/tsm1/int_test.go b/tsdb/engine/tsm1/int_test.go
new file mode 100644
index 00000000000..279b55e49bf
--- /dev/null
+++ b/tsdb/engine/tsm1/int_test.go
@@ -0,0 +1,249 @@
+package tsm1_test
+
+import (
+	"math"
+	"testing"
+
+	"github.com/influxdb/influxdb/tsdb/engine/tsm1"
+)
+
+func Test_Int64Encoder_NoValues(t *testing.T) {
+	enc := tsm1.NewInt64Encoder()
+	b, err := enc.Bytes()
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	dec := tsm1.NewInt64Decoder(b)
+	if dec.Next() {
+		t.Fatalf("unexpected next value: got true, exp false")
+	}
+}
+
+func Test_Int64Encoder_One(t *testing.T) {
+	enc := tsm1.NewInt64Encoder()
+	v1 := int64(1)
+
+	enc.Write(1)
+	b, err := enc.Bytes()
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	dec := tsm1.NewInt64Decoder(b)
+	if !dec.Next() {
+		t.Fatalf("unexpected next value: got true, exp false")
+	}
+
+	if v1 != dec.Read() {
+		t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), v1)
+	}
+}
+
+func Test_Int64Encoder_Two(t *testing.T) {
+	enc := tsm1.NewInt64Encoder()
+	var v1, v2 int64 = 1, 2
+
+	enc.Write(v1)
+	enc.Write(v2)
+
+	b, err := enc.Bytes()
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	dec := tsm1.NewInt64Decoder(b)
+	if !dec.Next() {
+		t.Fatalf("unexpected next value: got true, exp false")
+	}
+
+	if v1 != dec.Read() {
+		t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), v1)
+	}
+
+	if !dec.Next() {
+		t.Fatalf("unexpected next value: got true, exp false")
+	}
+
+	if v2 != dec.Read() {
+		t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), v2)
+	}
+}
+
+func Test_Int64Encoder_Negative(t *testing.T) {
+	enc := tsm1.NewInt64Encoder()
+	var v1, v2, v3 int64 = -2, 0, 1
+
+	enc.Write(v1)
+	enc.Write(v2)
+	enc.Write(v3)
+
+	b, err := enc.Bytes()
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	dec := tsm1.NewInt64Decoder(b)
+	if !dec.Next() {
+		t.Fatalf("unexpected next value: got true, exp false")
+	}
+
+	if v1 != dec.Read() {
+		t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), v1)
+	}
+
+	if !dec.Next() {
+		t.Fatalf("unexpected next value: got true, exp false")
+	}
+
+	if v2 != dec.Read() {
+		t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), v2)
+	}
+
+	if !dec.Next() {
+		t.Fatalf("unexpected next value: got true, exp false")
+	}
+
+	if v3 != dec.Read() {
+		t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), v3)
+	}
+}
+
+func Test_Int64Encoder_Large_Range(t *testing.T) {
+	enc := tsm1.NewInt64Encoder()
+	var v1, v2 int64 = math.MinInt64, math.MaxInt64
+	enc.Write(v1)
+	enc.Write(v2)
+	b, err := enc.Bytes()
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	dec := tsm1.NewInt64Decoder(b)
+	if !dec.Next() {
+		t.Fatalf("unexpected next value: got true, exp false")
+	}
+
+	if v1 != dec.Read() {
+		t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), v1)
+	}
+
+	if !dec.Next() {
+		t.Fatalf("unexpected next value: got true, exp false")
+	}
+
+	if v2 != dec.Read() {
+		t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), v2)
+	}
+}
+
+func Test_Int64Encoder_Uncompressed(t *testing.T) {
+	enc := tsm1.NewInt64Encoder()
+	var v1, v2, v3 int64 = 0, 1, 1 << 60
+
+	enc.Write(v1)
+	enc.Write(v2)
+	enc.Write(v3)
+
+	b, err := enc.Bytes()
+	if err != nil {
+		t.Fatalf("expected error: %v", err)
+	}
+
+	// 1 byte header + 3 * 8 byte values
+	if exp := 25; len(b) != exp {
+		t.Fatalf("length mismatch: got %v, exp %v", len(b), exp)
+	}
+
+	dec := tsm1.NewInt64Decoder(b)
+	if !dec.Next() {
+		t.Fatalf("unexpected next value: got true, exp false")
+	}
+
+	if v1 != dec.Read() {
+		t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), v1)
+	}
+
+	if !dec.Next() {
+		t.Fatalf("unexpected next value: got true, exp false")
+	}
+
+	if v2 != dec.Read() {
+		t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), v2)
+	}
+
+	if !dec.Next() {
+		t.Fatalf("unexpected next value: got true, exp false")
+	}
+
+	if v3 != dec.Read() {
+		t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), v3)
+	}
+}
+
+func Test_Int64Encoder_AllNegative(t *testing.T) {
+	enc := tsm1.NewInt64Encoder()
+	values := []int64{
+		-10, -5, -1,
+	}
+
+	for _, v := range values {
+		enc.Write(v)
+	}
+
+	b, err := enc.Bytes()
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	dec := tsm1.NewInt64Decoder(b)
+	i := 0
+	for dec.Next() {
+		if i > len(values) {
+			t.Fatalf("read too many values: got %v, exp %v", i, len(values))
+		}
+
+		if values[i] != dec.Read() {
+			t.Fatalf("read value %d mismatch: got %v, exp %v", i, dec.Read(), values[i])
+		}
+		i += 1
+	}
+}
+
+func BenchmarkInt64Encoder(b *testing.B) {
+	enc := tsm1.NewInt64Encoder()
+	x := make([]int64, 1024)
+	for i := 0; i < len(x); i++ {
+		x[i] = int64(i)
+		enc.Write(x[i])
+	}
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		enc.Bytes()
+	}
+}
+
+type byteSetter interface {
+	SetBytes(b []byte)
+}
+
+func BenchmarkInt64Decoder(b *testing.B) {
+	x := make([]int64, 1024)
+	enc := tsm1.NewInt64Encoder()
+	for i := 0; i < len(x); i++ {
+		x[i] = int64(i)
+		enc.Write(x[i])
+	}
+	bytes, _ := enc.Bytes()
+
+	b.ResetTimer()
+
+	dec := tsm1.NewInt64Decoder(bytes)
+
+	for i := 0; i < b.N; i++ {
+		dec.(byteSetter).SetBytes(bytes)
+		for dec.Next() {
+		}
+	}
+}
diff --git a/tsdb/engine/tsm1/string.go b/tsdb/engine/tsm1/string.go
new file mode 100644
index 00000000000..da06bc53599
--- /dev/null
+++ b/tsdb/engine/tsm1/string.go
@@ -0,0 +1,94 @@
+package tsm1
+
+// String encoding uses snappy compression to compress each string.  Each string is
+// appended to byte slice prefixed with a variable byte length followed by the string
+// bytes.  The bytes are compressed using snappy compressor and a 1 byte header is used
+// to indicate the type of encoding.
+
+import (
+	"encoding/binary"
+	"fmt"
+
+	"github.com/golang/snappy"
+)
+
+const (
+	// stringUncompressed is a an uncompressed format encoding strings as raw bytes
+	stringUncompressed = 0
+	// stringCompressedSnappy is a compressed encoding using Snappy compression
+	stringCompressedSnappy = 1
+)
+
+type StringEncoder interface {
+	Write(s string)
+	Bytes() ([]byte, error)
+}
+
+type StringDecoder interface {
+	Next() bool
+	Read() string
+	Error() error
+}
+
+type stringEncoder struct {
+	// The encoded bytes
+	bytes []byte
+}
+
+func NewStringEncoder() StringEncoder {
+	return &stringEncoder{}
+}
+
+func (e *stringEncoder) Write(s string) {
+	b := make([]byte, 10)
+	// Append the length of the string using variable byte encoding
+	i := binary.PutUvarint(b, uint64(len(s)))
+	e.bytes = append(e.bytes, b[:i]...)
+
+	// Append the string bytes
+	e.bytes = append(e.bytes, s...)
+}
+
+func (e *stringEncoder) Bytes() ([]byte, error) {
+	// Compress the currently appended bytes using snappy and prefix with
+	// a 1 byte header for future extension
+	data := snappy.Encode(nil, e.bytes)
+	return append([]byte{stringCompressedSnappy << 4}, data...), nil
+}
+
+type stringDecoder struct {
+	b   []byte
+	l   int
+	i   int
+	err error
+}
+
+func NewStringDecoder(b []byte) (StringDecoder, error) {
+	// First byte stores the encoding type, only have snappy format
+	// currently so ignore for now.
+	data, err := snappy.Decode(nil, b[1:])
+	if err != nil {
+		return nil, fmt.Errorf("failed to decode string block: %v", err.Error())
+	}
+
+	return &stringDecoder{b: data}, nil
+}
+
+func (e *stringDecoder) Next() bool {
+	e.i += e.l
+	return e.i < len(e.b)
+}
+
+func (e *stringDecoder) Read() string {
+	// Read the length of the string
+	length, n := binary.Uvarint(e.b[e.i:])
+
+	// The length of this string plus the length of the variable byte encoded length
+	e.l = int(length) + n
+
+	return string(e.b[e.i+n : e.i+n+int(length)])
+}
+
+func (e *stringDecoder) Error() error {
+	return e.err
+}
diff --git a/tsdb/engine/tsm1/string_test.go b/tsdb/engine/tsm1/string_test.go
new file mode 100644
index 00000000000..f5143514ecc
--- /dev/null
+++ b/tsdb/engine/tsm1/string_test.go
@@ -0,0 +1,85 @@
+package tsm1
+
+import (
+	"fmt"
+	"testing"
+)
+
+func Test_StringEncoder_NoValues(t *testing.T) {
+	enc := NewStringEncoder()
+	b, err := enc.Bytes()
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	dec, err := NewStringDecoder(b)
+	if err != nil {
+		t.Fatalf("unexpected erorr creating string decoder: %v", err)
+	}
+	if dec.Next() {
+		t.Fatalf("unexpected next value: got true, exp false")
+	}
+}
+
+func Test_StringEncoder_Single(t *testing.T) {
+	enc := NewStringEncoder()
+	v1 := "v1"
+	enc.Write(v1)
+	b, err := enc.Bytes()
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	dec, err := NewStringDecoder(b)
+	if err != nil {
+		t.Fatalf("unexpected erorr creating string decoder: %v", err)
+	}
+	if !dec.Next() {
+		t.Fatalf("unexpected next value: got false, exp true")
+	}
+
+	if v1 != dec.Read() {
+		t.Fatalf("unexpected value: got %v, exp %v", dec.Read(), v1)
+	}
+}
+
+func Test_StringEncoder_Multi_Compressed(t *testing.T) {
+	enc := NewStringEncoder()
+
+	values := make([]string, 10)
+	for i := range values {
+		values[i] = fmt.Sprintf("value %d", i)
+		enc.Write(values[i])
+	}
+
+	b, err := enc.Bytes()
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if b[0]>>4 != stringCompressedSnappy {
+		t.Fatalf("unexpected encoding: got %v, exp %v", b[0], stringCompressedSnappy)
+	}
+
+	if exp := 47; len(b) != exp {
+		t.Fatalf("unexpected length: got %v, exp %v", len(b), exp)
+	}
+
+	dec, err := NewStringDecoder(b)
+	if err != nil {
+		t.Fatalf("unexpected erorr creating string decoder: %v", err)
+	}
+
+	for i, v := range values {
+		if !dec.Next() {
+			t.Fatalf("unexpected next value: got false, exp true")
+		}
+		if v != dec.Read() {
+			t.Fatalf("unexpected value at pos %d: got %v, exp %v", i, dec.Read(), v)
+		}
+	}
+
+	if dec.Next() {
+		t.Fatalf("unexpected next value: got true, exp false")
+	}
+}
diff --git a/tsdb/engine/tsm1/timestamp.go b/tsdb/engine/tsm1/timestamp.go
new file mode 100644
index 00000000000..ad7ed644196
--- /dev/null
+++ b/tsdb/engine/tsm1/timestamp.go
@@ -0,0 +1,309 @@
+package tsm1
+
+// Timestamp encoding is adaptive and based on structure of the timestamps that are encoded.  It
+// uses a combination of delta encoding, scaling and compression using simple8b, run length encoding
+// as well as falling back to no compression if needed.
+//
+// Timestamp values to be encoded should be sorted before encoding.  When encoded, the values are
+// first delta-encoded.  The first value is the starting timestamp, subsequent values are the difference.
+// from the prior value.
+//
+// Timestamp resolution can also be in the nanosecond.  Many timestamps are monotonically increasing
+// and fall on even boundaries of time such as every 10s.  When the timestamps have this structure,
+// they are scaled by the largest common divisor that is also a factor of 10.  This has the effect
+// of converting very large integer deltas into very small one that can be reversed by multiplying them
+// by the scaling factor.
+//
+// Using these adjusted values, if all the deltas are the same, the time range is stored using run
+// length encoding.  If run length encoding is not possible and all values are less than 1 << 60 - 1
+//  (~36.5 yrs in nanosecond resolution), then the timestamps are encoded using simple8b encoding.  If
+// any value exceeds the maximum values, the deltas are stored uncompressed using 8b each.
+//
+// Each compressed byte slice has a 1 byte header indicating the compression type.  The 4 high bits
+// indicated the encoding type.  The 4 low bits are used by the encoding type.
+//
+// For run-length encoding, the 4 low bits store the log10 of the scaling factor.  The next 8 bytes are
+// the starting timestamp, next 1-10 bytes is the delta value using variable-length encoding, finally the
+// next 1-10 bytes is the count of values.
+//
+// For simple8b encoding, the 4 low bits store the log10 of the scaling factor.  The next 8 bytes is the
+// first delta value stored uncompressed, the remaining bytes are 64bit words containg compressed delta
+// values.
+//
+// For uncompressed encoding, the delta values are stored using 8 bytes each.
+
+import (
+	"encoding/binary"
+	"fmt"
+	"math"
+	"time"
+
+	"github.com/jwilder/encoding/simple8b"
+)
+
+const (
+	// timeUncompressed is a an uncompressed format using 8 bytes per timestamp
+	timeUncompressed = 0
+	// timeCompressedPackedSimple is a bit-packed format using simple8b encoding
+	timeCompressedPackedSimple = 1
+	// timeCompressedRLE is a run-length encoding format
+	timeCompressedRLE = 2
+)
+
+// TimeEncoder encodes time.Time to byte slices.
+type TimeEncoder interface {
+	Write(t time.Time)
+	Bytes() ([]byte, error)
+}
+
+// TimeEncoder decodes byte slices to time.Time values.
+type TimeDecoder interface {
+	Next() bool
+	Read() time.Time
+	Error() error
+}
+
+type encoder struct {
+	ts []uint64
+}
+
+// NewTimeEncoder returns a TimeEncoder
+func NewTimeEncoder() TimeEncoder {
+	return &encoder{}
+}
+
+// Write adds a time.Time to the compressed stream.
+func (e *encoder) Write(t time.Time) {
+	e.ts = append(e.ts, uint64(t.UnixNano()))
+}
+
+func (e *encoder) reduce() (max, divisor uint64, rle bool, deltas []uint64) {
+	// Compute the deltas in place to avoid allocating another slice
+	deltas = e.ts
+	// Starting values for a max and divisor
+	max, divisor = 0, 1e12
+
+	// Indicates whether the the deltas can be run-length encoded
+	rle = true
+
+	// Iterate in reverse so we can apply deltas in place
+	for i := len(deltas) - 1; i > 0; i-- {
+
+		// First differential encode the values
+		deltas[i] = deltas[i] - deltas[i-1]
+
+		// We also need to keep track of the max value and largest common divisor
+		v := deltas[i]
+
+		if v > max {
+			max = v
+		}
+
+		for {
+			// If our value is divisible by 10, break.  Otherwise, try the next smallest divisor.
+			if v%divisor == 0 {
+				break
+			}
+			divisor /= 10
+		}
+
+		// Skip the first value || see if prev = curr.  The deltas can be RLE if the are all equal.
+		rle = i == len(deltas)-1 || rle && (deltas[i+1] == deltas[i])
+	}
+	return
+}
+
+// Bytes returns the encoded bytes of all written times.
+func (e *encoder) Bytes() ([]byte, error) {
+	if len(e.ts) == 0 {
+		return []byte{}, nil
+	}
+
+	// Maximum and largest common divisor.  rle is true if dts (the delta timestamps),
+	// are all the same.
+	max, div, rle, dts := e.reduce()
+
+	// The deltas are all the same, so we can run-length encode them
+	if rle && len(e.ts) > 60 {
+		return e.encodeRLE(e.ts[0], e.ts[1], div, len(e.ts))
+	}
+
+	// We can't compress this time-range, the deltas exceed 1 << 60
+	if max > simple8b.MaxValue {
+		return e.encodeRaw()
+	}
+
+	return e.encodePacked(div, dts)
+}
+
+func (e *encoder) encodePacked(div uint64, dts []uint64) ([]byte, error) {
+	enc := simple8b.NewEncoder()
+	for _, v := range dts[1:] {
+		enc.Write(uint64(v) / div)
+	}
+
+	b := make([]byte, 8+1)
+
+	// 4 high bits used for the encoding type
+	b[0] = byte(timeCompressedPackedSimple) << 4
+	// 4 low bits are the log10 divisor
+	b[0] |= byte(math.Log10(float64(div)))
+
+	// The first delta value
+	binary.BigEndian.PutUint64(b[1:9], uint64(dts[0]))
+
+	// The compressed deltas
+	deltas, err := enc.Bytes()
+	if err != nil {
+		return nil, err
+	}
+
+	return append(b, deltas...), nil
+}
+
+func (e *encoder) encodeRaw() ([]byte, error) {
+	b := make([]byte, 1+len(e.ts)*8)
+	b[0] = byte(timeUncompressed) << 4
+	for i, v := range e.ts {
+		binary.BigEndian.PutUint64(b[1+i*8:1+i*8+8], uint64(v))
+	}
+	return b, nil
+}
+
+func (e *encoder) encodeRLE(first, delta, div uint64, n int) ([]byte, error) {
+	// Large varints can take up to 10 bytes
+	b := make([]byte, 1+10*3)
+
+	// 4 high bits used for the encoding type
+	b[0] = byte(timeCompressedRLE) << 4
+	// 4 low bits are the log10 divisor
+	b[0] |= byte(math.Log10(float64(div)))
+
+	i := 1
+	// The first timestamp
+	binary.BigEndian.PutUint64(b[i:], uint64(first))
+	i += 8
+	// The first delta
+	i += binary.PutUvarint(b[i:], uint64(delta/div))
+	// The number of times the delta is repeated
+	i += binary.PutUvarint(b[i:], uint64(n))
+
+	return b[:i], nil
+}
+
+type decoder struct {
+	v   time.Time
+	ts  []uint64
+	err error
+}
+
+func NewTimeDecoder(b []byte) TimeDecoder {
+	d := &decoder{}
+	d.decode(b)
+	return d
+}
+
+func (d *decoder) Next() bool {
+	if len(d.ts) == 0 {
+		return false
+	}
+	d.v = time.Unix(0, int64(d.ts[0]))
+	d.ts = d.ts[1:]
+	return true
+}
+
+func (d *decoder) Read() time.Time {
+	return d.v
+}
+
+func (d *decoder) Error() error {
+	return d.err
+}
+
+func (d *decoder) decode(b []byte) {
+	if len(b) == 0 {
+		return
+	}
+
+	// Encoding type is stored in the 4 high bits of the first byte
+	encoding := b[0] >> 4
+	switch encoding {
+	case timeUncompressed:
+		d.decodeRaw(b[1:])
+	case timeCompressedRLE:
+		d.decodeRLE(b)
+	case timeCompressedPackedSimple:
+		d.decodePacked(b)
+	default:
+		d.err = fmt.Errorf("unknown encoding: %v", encoding)
+	}
+}
+
+func (d *decoder) decodePacked(b []byte) {
+	div := uint64(math.Pow10(int(b[0] & 0xF)))
+	first := uint64(binary.BigEndian.Uint64(b[1:9]))
+
+	enc := simple8b.NewDecoder(b[9:])
+
+	deltas := []uint64{first}
+	for enc.Next() {
+		deltas = append(deltas, enc.Read())
+	}
+
+	// Compute the prefix sum and scale the deltas back up
+	for i := 1; i < len(deltas); i++ {
+		dgap := deltas[i] * div
+		deltas[i] = deltas[i-1] + dgap
+	}
+
+	d.ts = deltas
+}
+
+func (d *decoder) decodeRLE(b []byte) {
+	var i, n int
+
+	// Lower 4 bits hold the 10 based exponent so we can scale the values back up
+	mod := int64(math.Pow10(int(b[i] & 0xF)))
+	i += 1
+
+	// Next 8 bytes is the starting timestamp
+	first := binary.BigEndian.Uint64(b[i : i+8])
+	i += 8
+
+	// Next 1-10 bytes is our (scaled down by factor of 10) run length values
+	value, n := binary.Uvarint(b[i:])
+
+	// Scale the value back up
+	value *= uint64(mod)
+	i += n
+
+	// Last 1-10 bytes is how many times the value repeats
+	count, n := binary.Uvarint(b[i:])
+
+	// Rebuild construct the original values now
+	deltas := make([]uint64, count)
+	for i := range deltas {
+		deltas[i] = value
+	}
+
+	// Reverse the delta-encoding
+	deltas[0] = first
+	for i := 1; i < len(deltas); i++ {
+		deltas[i] = deltas[i-1] + deltas[i]
+	}
+
+	d.ts = deltas
+}
+
+func (d *decoder) decodeRaw(b []byte) {
+	d.ts = make([]uint64, len(b)/8)
+	for i := range d.ts {
+		d.ts[i] = binary.BigEndian.Uint64(b[i*8 : i*8+8])
+
+		delta := d.ts[i]
+		// Compute the prefix sum and scale the deltas back up
+		if i > 0 {
+			d.ts[i] = d.ts[i-1] + delta
+		}
+	}
+}
diff --git a/tsdb/engine/tsm1/timestamp_test.go b/tsdb/engine/tsm1/timestamp_test.go
new file mode 100644
index 00000000000..402a6578a11
--- /dev/null
+++ b/tsdb/engine/tsm1/timestamp_test.go
@@ -0,0 +1,388 @@
+package tsm1
+
+import (
+	"testing"
+	"time"
+)
+
+func Test_TimeEncoder(t *testing.T) {
+	enc := NewTimeEncoder()
+
+	x := []time.Time{}
+	now := time.Unix(0, 0)
+	x = append(x, now)
+	enc.Write(now)
+	for i := 1; i < 4; i++ {
+		x = append(x, now.Add(time.Duration(i)*time.Second))
+		enc.Write(x[i])
+	}
+
+	b, err := enc.Bytes()
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if got := b[0] >> 4; got != timeCompressedPackedSimple {
+		t.Fatalf("Wrong encoding used: expected uncompressed, got %v", got)
+	}
+
+	dec := NewTimeDecoder(b)
+	for i, v := range x {
+		if !dec.Next() {
+			t.Fatalf("Next == false, expected true")
+		}
+
+		if v != dec.Read() {
+			t.Fatalf("Item %d mismatch, got %v, exp %v", i, dec.Read(), v)
+		}
+	}
+}
+
+func Test_TimeEncoder_NoValues(t *testing.T) {
+	enc := NewTimeEncoder()
+	b, err := enc.Bytes()
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	dec := NewTimeDecoder(b)
+	if dec.Next() {
+		t.Fatalf("unexpected next value: got true, exp false")
+	}
+}
+
+func Test_TimeEncoder_One(t *testing.T) {
+	enc := NewTimeEncoder()
+	tm := time.Unix(0, 0)
+
+	enc.Write(tm)
+	b, err := enc.Bytes()
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if got := b[0] >> 4; got != timeCompressedPackedSimple {
+		t.Fatalf("Wrong encoding used: expected uncompressed, got %v", got)
+	}
+
+	dec := NewTimeDecoder(b)
+	if !dec.Next() {
+		t.Fatalf("unexpected next value: got true, exp false")
+	}
+
+	if tm != dec.Read() {
+		t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), tm)
+	}
+}
+
+func Test_TimeEncoder_Two(t *testing.T) {
+	enc := NewTimeEncoder()
+	t1 := time.Unix(0, 0)
+	t2 := time.Unix(0, 1)
+	enc.Write(t1)
+	enc.Write(t2)
+
+	b, err := enc.Bytes()
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if got := b[0] >> 4; got != timeCompressedPackedSimple {
+		t.Fatalf("Wrong encoding used: expected uncompressed, got %v", got)
+	}
+
+	dec := NewTimeDecoder(b)
+	if !dec.Next() {
+		t.Fatalf("unexpected next value: got true, exp false")
+	}
+
+	if t1 != dec.Read() {
+		t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), t1)
+	}
+
+	if !dec.Next() {
+		t.Fatalf("unexpected next value: got true, exp false")
+	}
+
+	if t2 != dec.Read() {
+		t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), t2)
+	}
+}
+
+func Test_TimeEncoder_Three(t *testing.T) {
+	enc := NewTimeEncoder()
+	t1 := time.Unix(0, 0)
+	t2 := time.Unix(0, 1)
+	t3 := time.Unix(0, 2)
+
+	enc.Write(t1)
+	enc.Write(t2)
+	enc.Write(t3)
+
+	b, err := enc.Bytes()
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if got := b[0] >> 4; got != timeCompressedPackedSimple {
+		t.Fatalf("Wrong encoding used: expected uncompressed, got %v", got)
+	}
+
+	dec := NewTimeDecoder(b)
+	if !dec.Next() {
+		t.Fatalf("unexpected next value: got true, exp false")
+	}
+
+	if t1 != dec.Read() {
+		t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), t1)
+	}
+
+	if !dec.Next() {
+		t.Fatalf("unexpected next value: got true, exp false")
+	}
+
+	if t2 != dec.Read() {
+		t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), t2)
+	}
+
+	if !dec.Next() {
+		t.Fatalf("unexpected next value: got true, exp false")
+	}
+
+	if t3 != dec.Read() {
+		t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), t3)
+	}
+}
+
+func Test_TimeEncoder_Large_Range(t *testing.T) {
+	enc := NewTimeEncoder()
+	t1 := time.Unix(0, 1442369134000000000)
+	t2 := time.Unix(0, 1442369135000000000)
+	enc.Write(t1)
+	enc.Write(t2)
+	b, err := enc.Bytes()
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if got := b[0] >> 4; got != timeCompressedPackedSimple {
+		t.Fatalf("Wrong encoding used: expected uncompressed, got %v", got)
+	}
+
+	dec := NewTimeDecoder(b)
+	if !dec.Next() {
+		t.Fatalf("unexpected next value: got true, exp false")
+	}
+
+	if t1 != dec.Read() {
+		t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), t1)
+	}
+
+	if !dec.Next() {
+		t.Fatalf("unexpected next value: got true, exp false")
+	}
+
+	if t2 != dec.Read() {
+		t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), t2)
+	}
+}
+
+func Test_TimeEncoder_Uncompressed(t *testing.T) {
+	enc := NewTimeEncoder()
+	t1 := time.Unix(0, 0)
+	t2 := time.Unix(1, 0)
+
+	// about 36.5yrs in NS resolution is max range for compressed format
+	// This should cause the encoding to fallback to raw points
+	t3 := time.Unix(2, (2 << 59))
+	enc.Write(t1)
+	enc.Write(t2)
+	enc.Write(t3)
+
+	b, err := enc.Bytes()
+	if err != nil {
+		t.Fatalf("expected error: %v", err)
+	}
+
+	if exp := 25; len(b) != exp {
+		t.Fatalf("length mismatch: got %v, exp %v", len(b), exp)
+	}
+
+	if got := b[0] >> 4; got != timeUncompressed {
+		t.Fatalf("Wrong encoding used: expected uncompressed, got %v", got)
+	}
+
+	dec := NewTimeDecoder(b)
+	if !dec.Next() {
+		t.Fatalf("unexpected next value: got true, exp false")
+	}
+
+	if t1 != dec.Read() {
+		t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), t1)
+	}
+
+	if !dec.Next() {
+		t.Fatalf("unexpected next value: got true, exp false")
+	}
+
+	if t2 != dec.Read() {
+		t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), t2)
+	}
+
+	if !dec.Next() {
+		t.Fatalf("unexpected next value: got true, exp false")
+	}
+
+	if t3 != dec.Read() {
+		t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), t3)
+	}
+}
+
+func Test_TimeEncoder_RLE(t *testing.T) {
+	enc := NewTimeEncoder()
+	var ts []time.Time
+	for i := 0; i < 500; i++ {
+		ts = append(ts, time.Unix(int64(i), 0))
+	}
+
+	for _, v := range ts {
+		enc.Write(v)
+	}
+
+	b, err := enc.Bytes()
+	if exp := 12; len(b) != exp {
+		t.Fatalf("length mismatch: got %v, exp %v", len(b), exp)
+	}
+
+	if got := b[0] >> 4; got != timeCompressedRLE {
+		t.Fatalf("Wrong encoding used: expected uncompressed, got %v", got)
+	}
+
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	dec := NewTimeDecoder(b)
+	for i, v := range ts {
+		if !dec.Next() {
+			t.Fatalf("Next == false, expected true")
+		}
+
+		if v != dec.Read() {
+			t.Fatalf("Item %d mismatch, got %v, exp %v", i, dec.Read(), v)
+		}
+	}
+
+	if dec.Next() {
+		t.Fatalf("unexpected extra values")
+	}
+}
+
+func Test_TimeEncoder_Reverse(t *testing.T) {
+	enc := NewTimeEncoder()
+	ts := []time.Time{
+		time.Unix(0, 3),
+		time.Unix(0, 2),
+		time.Unix(0, 1),
+	}
+
+	for _, v := range ts {
+		enc.Write(v)
+	}
+
+	b, err := enc.Bytes()
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if got := b[0] >> 4; got != timeUncompressed {
+		t.Fatalf("Wrong encoding used: expected uncompressed, got %v", got)
+	}
+
+	dec := NewTimeDecoder(b)
+	i := 0
+	for dec.Next() {
+		if ts[i] != dec.Read() {
+			t.Fatalf("read value %d mismatch: got %v, exp %v", i, dec.Read(), ts[i])
+		}
+		i += 1
+	}
+}
+
+func Test_TimeEncoder_220SecondDelta(t *testing.T) {
+	enc := NewTimeEncoder()
+	var ts []time.Time
+	now := time.Now()
+	for i := 0; i < 220; i++ {
+		ts = append(ts, now.Add(time.Duration(i*60)*time.Second))
+	}
+
+	for _, v := range ts {
+		enc.Write(v)
+	}
+
+	b, err := enc.Bytes()
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	// Using RLE, should get 12 bytes
+	if exp := 12; len(b) != exp {
+		t.Fatalf("unexpected length: got %v, exp %v", len(b), exp)
+	}
+
+	if got := b[0] >> 4; got != timeCompressedRLE {
+		t.Fatalf("Wrong encoding used: expected uncompressed, got %v", got)
+	}
+
+	dec := NewTimeDecoder(b)
+	i := 0
+	for dec.Next() {
+		if ts[i] != dec.Read() {
+			t.Fatalf("read value %d mismatch: got %v, exp %v", i, dec.Read(), ts[i])
+		}
+		i += 1
+	}
+
+	if i != len(ts) {
+		t.Fatalf("Read too few values: exp %d, got %d", len(ts), i)
+	}
+
+	if dec.Next() {
+		t.Fatalf("expecte Next() = false, got true")
+	}
+}
+
+func BenchmarkTimeEncoder(b *testing.B) {
+	enc := NewTimeEncoder()
+	x := make([]time.Time, 1024)
+	for i := 0; i < len(x); i++ {
+		x[i] = time.Now()
+		enc.Write(x[i])
+	}
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		enc.Bytes()
+	}
+}
+
+func BenchmarkTimeDecoder(b *testing.B) {
+	x := make([]time.Time, 1024)
+	enc := NewTimeEncoder()
+	for i := 0; i < len(x); i++ {
+		x[i] = time.Now()
+		enc.Write(x[i])
+	}
+	bytes, _ := enc.Bytes()
+
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		b.StopTimer()
+		dec := NewTimeDecoder(bytes)
+		b.StartTimer()
+		for dec.Next() {
+		}
+	}
+}
diff --git a/tsdb/engine/tsm1/tsm1.go b/tsdb/engine/tsm1/tsm1.go
new file mode 100644
index 00000000000..59a0f3fe051
--- /dev/null
+++ b/tsdb/engine/tsm1/tsm1.go
@@ -0,0 +1,1974 @@
+package tsm1
+
+import (
+	"encoding/binary"
+	"encoding/json"
+	"fmt"
+	"hash/fnv"
+	"io"
+	"io/ioutil"
+	"log"
+	"math"
+	"os"
+	"path/filepath"
+	"reflect"
+	"sort"
+	"strings"
+	"sync"
+	"syscall"
+	"time"
+
+	"github.com/golang/snappy"
+	"github.com/influxdb/influxdb/models"
+	"github.com/influxdb/influxdb/tsdb"
+)
+
+const (
+	// Format is the file format name of this engine.
+	Format = "tsm1"
+
+	//IDsFileExtension is the extension for the file that keeps the compressed map
+	// of keys to uint64 IDs.
+	IDsFileExtension = "ids"
+
+	// FieldsFileExtension is the extension for the file that stores compressed field
+	// encoding data for this db
+	FieldsFileExtension = "fields"
+
+	// SeriesFileExtension is the extension for the file that stores the compressed
+	// series metadata for series in this db
+	SeriesFileExtension = "series"
+
+	// CollisionsFileExtension is the extension for the file that keeps a map of which
+	// keys have hash collisions and what their actual IDs are
+	CollisionsFileExtension = "collisions"
+
+	//CheckpointExtension is the extension given to files that checkpoint.
+	// The checkpoint files are created when a new file is first created. They
+	// are removed after the file has been synced and is safe for use. If a file
+	// has an associated checkpoint file, it wasn't safely written and both should be removed
+	CheckpointExtension = "check"
+
+	// keyFieldSeparator separates the series key from the field name in the composite key
+	// that identifies a specific field in series
+	keyFieldSeparator = "#!~#"
+)
+
+type TimePrecision uint8
+
+const (
+	Seconds TimePrecision = iota
+	Milliseconds
+	Microseconds
+	Nanoseconds
+)
+
+func init() {
+	tsdb.RegisterEngine(Format, NewEngine)
+}
+
+const (
+	MaxDataFileSize = 1024 * 1024 * 1024 // 1GB
+
+	// DefaultRotateBlockSize is the default size to rotate to a new compressed block
+	DefaultRotateBlockSize = 512 * 1024 // 512KB
+
+	DefaultRotateFileSize = 5 * 1024 * 1024 // 5MB
+
+	DefaultMaxPointsPerBlock = 1000
+
+	// MAP_POPULATE is for the mmap syscall. For some reason this isn't defined in golang's syscall
+	MAP_POPULATE = 0x8000
+
+	// magicNumber is written as the first 4 bytes of a data file to
+	// identify the file as a tsm1 formatted file
+	magicNumber uint32 = 0x16D116D1
+)
+
+// Ensure Engine implements the interface.
+var _ tsdb.Engine = &Engine{}
+
+// Engine represents a storage engine with compressed blocks.
+type Engine struct {
+	writeLock *WriteLock
+	metaLock  sync.Mutex
+	path      string
+	logger    *log.Logger
+
+	// deletesPending mark how many old data files are waiting to be deleted. This will
+	// keep a close from returning until all deletes finish
+	deletesPending sync.WaitGroup
+
+	// HashSeriesField is a function that takes a series key and a field name
+	// and returns a hash identifier. It's not guaranteed to be unique.
+	HashSeriesField func(key string) uint64
+
+	WAL *Log
+
+	RotateFileSize             uint32
+	SkipCompaction             bool
+	CompactionAge              time.Duration
+	MinCompactionFileCount     int
+	IndexCompactionFullAge     time.Duration
+	IndexMinCompactionInterval time.Duration
+	MaxPointsPerBlock          int
+	RotateBlockSize            int
+
+	// filesLock is only for modifying and accessing the files slice
+	filesLock          sync.RWMutex
+	files              dataFiles
+	currentFileID      int
+	compactionRunning  bool
+	lastCompactionTime time.Time
+
+	// deletes is a map of keys that are deleted, but haven't yet been
+	// compacted and flushed. They map the ID to the corresponding key
+	deletes map[uint64]string
+
+	// deleteMeasurements is a map of the measurements that are deleted
+	// but haven't yet been compacted and flushed
+	deleteMeasurements map[string]bool
+
+	collisionsLock sync.RWMutex
+	collisions     map[string]uint64
+
+	// queryLock keeps data files from being deleted or the store from
+	// being closed while queries are running
+	queryLock sync.RWMutex
+}
+
+// NewEngine returns a new instance of Engine.
+func NewEngine(path string, walPath string, opt tsdb.EngineOptions) tsdb.Engine {
+	w := NewLog(path)
+	w.FlushColdInterval = time.Duration(opt.Config.WALFlushColdInterval)
+	w.FlushMemorySizeThreshold = opt.Config.WALFlushMemorySizeThreshold
+	w.MaxMemorySizeThreshold = opt.Config.WALMaxMemorySizeThreshold
+	w.LoggingEnabled = opt.Config.WALLoggingEnabled
+
+	e := &Engine{
+		path:      path,
+		writeLock: &WriteLock{},
+		logger:    log.New(os.Stderr, "[tsm1] ", log.LstdFlags),
+
+		// TODO: this is the function where we can inject a check against the in memory collisions
+		HashSeriesField:            hashSeriesField,
+		WAL:                        w,
+		RotateFileSize:             DefaultRotateFileSize,
+		CompactionAge:              opt.Config.IndexCompactionAge,
+		MinCompactionFileCount:     opt.Config.IndexMinCompactionFileCount,
+		IndexCompactionFullAge:     opt.Config.IndexCompactionFullAge,
+		IndexMinCompactionInterval: opt.Config.IndexMinCompactionInterval,
+		MaxPointsPerBlock:          DefaultMaxPointsPerBlock,
+		RotateBlockSize:            DefaultRotateBlockSize,
+	}
+	e.WAL.Index = e
+
+	return e
+}
+
+// Path returns the path the engine was opened with.
+func (e *Engine) Path() string { return e.path }
+
+// PerformMaintenance is for periodic maintenance of the store. A no-op for b1
+func (e *Engine) PerformMaintenance() {
+	if f := e.WAL.shouldFlush(); f != noFlush {
+		go func() {
+			e.WAL.flush(f)
+		}()
+		return
+	}
+
+	// don't do a full compaction if the WAL received writes in the time window
+	if time.Since(e.WAL.LastWriteTime()) < e.IndexCompactionFullAge {
+		return
+	}
+
+	e.filesLock.RLock()
+	running := e.compactionRunning
+	deletesPending := len(e.deletes) > 0
+	e.filesLock.RUnlock()
+	if running || deletesPending {
+		return
+	}
+
+	// do a full compaction if all the index files are older than the compaction time
+	for _, f := range e.copyFilesCollection() {
+		if time.Since(f.modTime) < e.IndexCompactionFullAge {
+			return
+		}
+	}
+
+	go e.Compact(true)
+}
+
+// Format returns the format type of this engine
+func (e *Engine) Format() tsdb.EngineFormat {
+	return tsdb.TSM1Format
+}
+
+// Open opens and initializes the engine.
+func (e *Engine) Open() error {
+	if err := os.MkdirAll(e.path, 0777); err != nil {
+		return err
+	}
+
+	// perform any cleanup on metafiles that were halfway written
+	e.cleanupMetafile(SeriesFileExtension)
+	e.cleanupMetafile(FieldsFileExtension)
+	e.cleanupMetafile(IDsFileExtension)
+	e.cleanupMetafile(CollisionsFileExtension)
+
+	files, err := filepath.Glob(filepath.Join(e.path, fmt.Sprintf("*.%s", Format)))
+	if err != nil {
+		return err
+	}
+	for _, fn := range files {
+		// if the file has a checkpoint it's not valid, so remove it
+		if removed := e.removeFileIfCheckpointExists(fn); removed {
+			continue
+		}
+
+		id, err := idFromFileName(fn)
+		if err != nil {
+			return err
+		}
+		if id >= e.currentFileID {
+			e.currentFileID = id + 1
+		}
+		f, err := os.OpenFile(fn, os.O_RDONLY, 0666)
+		if err != nil {
+			return fmt.Errorf("error opening file %s: %s", fn, err.Error())
+		}
+		df, err := NewDataFile(f)
+		if err != nil {
+			return fmt.Errorf("error opening memory map for file %s: %s", fn, err.Error())
+		}
+		e.files = append(e.files, df)
+	}
+	sort.Sort(e.files)
+
+	if err := e.readCollisions(); err != nil {
+		return err
+	}
+
+	e.deletes = make(map[uint64]string)
+	e.deleteMeasurements = make(map[string]bool)
+
+	// mark the last compaction as now so it doesn't try to compact while
+	// flushing the WAL on load
+	e.lastCompactionTime = time.Now()
+
+	if err := e.WAL.Open(); err != nil {
+		return err
+	}
+
+	e.lastCompactionTime = time.Now()
+
+	return nil
+}
+
+// Close closes the engine.
+func (e *Engine) Close() error {
+	// get all the locks so queries, writes, and compactions stop before closing
+	e.queryLock.Lock()
+	defer e.queryLock.Unlock()
+	e.metaLock.Lock()
+	defer e.metaLock.Unlock()
+	min, max := int64(math.MinInt64), int64(math.MaxInt64)
+	e.writeLock.LockRange(min, max)
+	defer e.writeLock.UnlockRange(min, max)
+	e.filesLock.Lock()
+	defer e.filesLock.Unlock()
+
+	// ensure all deletes have been processed
+	e.deletesPending.Wait()
+
+	for _, df := range e.files {
+		_ = df.Close()
+	}
+	e.files = nil
+	e.currentFileID = 0
+	e.collisions = nil
+	e.deletes = nil
+	e.deleteMeasurements = nil
+	return nil
+}
+
+// DataFileCount returns the number of data files in the database
+func (e *Engine) DataFileCount() int {
+	e.filesLock.RLock()
+	defer e.filesLock.RUnlock()
+	return len(e.files)
+}
+
+// SetLogOutput is a no-op.
+func (e *Engine) SetLogOutput(w io.Writer) {}
+
+// LoadMetadataIndex loads the shard metadata into memory.
+func (e *Engine) LoadMetadataIndex(shard *tsdb.Shard, index *tsdb.DatabaseIndex, measurementFields map[string]*tsdb.MeasurementFields) error {
+	// Load measurement metadata
+	fields, err := e.readFields()
+	if err != nil {
+		return err
+	}
+	for k, mf := range fields {
+		m := index.CreateMeasurementIndexIfNotExists(string(k))
+		for name, _ := range mf.Fields {
+			m.SetFieldName(name)
+		}
+		mf.Codec = tsdb.NewFieldCodec(mf.Fields)
+		measurementFields[m.Name] = mf
+	}
+
+	// Load series metadata
+	series, err := e.readSeries()
+	if err != nil {
+		return err
+	}
+
+	// Load the series into the in-memory index in sorted order to ensure
+	// it's always consistent for testing purposes
+	a := make([]string, 0, len(series))
+	for k, _ := range series {
+		a = append(a, k)
+	}
+	sort.Strings(a)
+	for _, key := range a {
+		s := series[key]
+		s.InitializeShards()
+		index.CreateSeriesIndexIfNotExists(tsdb.MeasurementFromSeriesKey(string(key)), s)
+	}
+
+	return nil
+}
+
+// WritePoints writes metadata and point data into the engine.
+// Returns an error if new points are added to an existing key.
+func (e *Engine) WritePoints(points []models.Point, measurementFieldsToSave map[string]*tsdb.MeasurementFields, seriesToCreate []*tsdb.SeriesCreate) error {
+	return e.WAL.WritePoints(points, measurementFieldsToSave, seriesToCreate)
+}
+
+func (e *Engine) Write(pointsByKey map[string]Values, measurementFieldsToSave map[string]*tsdb.MeasurementFields, seriesToCreate []*tsdb.SeriesCreate) error {
+	// Flush any deletes before writing new data from the WAL
+	e.filesLock.RLock()
+	hasDeletes := len(e.deletes) > 0
+	e.filesLock.RUnlock()
+	if hasDeletes {
+		e.flushDeletes()
+	}
+
+	err, startTime, endTime, valuesByID := e.convertKeysAndWriteMetadata(pointsByKey, measurementFieldsToSave, seriesToCreate)
+	if err != nil {
+		return err
+	}
+	if len(valuesByID) == 0 {
+		return nil
+	}
+
+	files, lockStart, lockEnd := e.filesAndLock(startTime, endTime)
+	defer e.writeLock.UnlockRange(lockStart, lockEnd)
+
+	if len(files) == 0 {
+		return e.rewriteFile(nil, valuesByID)
+	}
+
+	maxTime := int64(math.MaxInt64)
+
+	// do the file rewrites in parallel
+	var mu sync.Mutex
+	var writes sync.WaitGroup
+	var errors []error
+
+	// reverse through the data files and write in the data
+	for i := len(files) - 1; i >= 0; i-- {
+		f := files[i]
+		// max times are exclusive, so add 1 to it
+		fileMax := f.MaxTime() + 1
+		fileMin := f.MinTime()
+		// if the file is < rotate, write all data between fileMin and maxTime
+		if f.size < e.RotateFileSize {
+			writes.Add(1)
+			go func(df *dataFile, vals map[uint64]Values) {
+				if err := e.rewriteFile(df, vals); err != nil {
+					mu.Lock()
+					errors = append(errors, err)
+					mu.Unlock()
+				}
+				writes.Done()
+			}(f, e.filterDataBetweenTimes(valuesByID, fileMin, maxTime))
+			continue
+		}
+		// if the file is > rotate:
+		//   write all data between fileMax and maxTime into new file
+		//   write all data between fileMin and fileMax into old file
+		writes.Add(1)
+		go func(vals map[uint64]Values) {
+			if err := e.rewriteFile(nil, vals); err != nil {
+				mu.Lock()
+				errors = append(errors, err)
+				mu.Unlock()
+			}
+			writes.Done()
+		}(e.filterDataBetweenTimes(valuesByID, fileMax, maxTime))
+		writes.Add(1)
+		go func(df *dataFile, vals map[uint64]Values) {
+			if err := e.rewriteFile(df, vals); err != nil {
+				mu.Lock()
+				errors = append(errors, err)
+				mu.Unlock()
+			}
+			writes.Done()
+		}(f, e.filterDataBetweenTimes(valuesByID, fileMin, fileMax))
+		maxTime = fileMin
+	}
+	// for any data leftover, write into a new file since it's all older
+	// than any file we currently have
+	writes.Add(1)
+	go func() {
+		if err := e.rewriteFile(nil, valuesByID); err != nil {
+			mu.Lock()
+			errors = append(errors, err)
+			mu.Unlock()
+		}
+		writes.Done()
+	}()
+
+	writes.Wait()
+
+	if len(errors) > 0 {
+		// TODO: log errors
+		return errors[0]
+	}
+
+	if !e.SkipCompaction && e.shouldCompact() {
+		go e.Compact(false)
+	}
+
+	return nil
+}
+
+// MarkDeletes will mark the given keys for deletion in memory. They will be deleted from data
+// files on the next flush. This mainly for the WAL to use on startup
+func (e *Engine) MarkDeletes(keys []string) {
+	e.filesLock.Lock()
+	defer e.filesLock.Unlock()
+	for _, k := range keys {
+		e.deletes[e.keyToID(k)] = k
+	}
+}
+
+func (e *Engine) MarkMeasurementDelete(name string) {
+	e.filesLock.Lock()
+	defer e.filesLock.Unlock()
+	e.deleteMeasurements[name] = true
+}
+
+// filesAndLock returns the data files that match the given range and
+// ensures that the write lock will hold for the entire range
+func (e *Engine) filesAndLock(min, max int64) (a dataFiles, lockStart, lockEnd int64) {
+	for {
+		a = make([]*dataFile, 0)
+		files := e.copyFilesCollection()
+
+		for _, f := range e.files {
+			fmin, fmax := f.MinTime(), f.MaxTime()
+			if min < fmax && fmin >= fmin {
+				a = append(a, f)
+			} else if max >= fmin && max < fmax {
+				a = append(a, f)
+			}
+		}
+
+		if len(a) > 0 {
+			lockStart = a[0].MinTime()
+			lockEnd = a[len(a)-1].MaxTime()
+			if max > lockEnd {
+				lockEnd = max
+			}
+		} else {
+			lockStart = min
+			lockEnd = max
+		}
+
+		e.writeLock.LockRange(lockStart, lockEnd)
+
+		// it's possible for compaction to change the files collection while we
+		// were waiting for a write lock on the range. Make sure the files are still the
+		// same after we got the lock, otherwise try again. This shouldn't happen often.
+		filesAfterLock := e.copyFilesCollection()
+		if reflect.DeepEqual(files, filesAfterLock) {
+			return
+		}
+
+		e.writeLock.UnlockRange(lockStart, lockEnd)
+	}
+}
+
+func (e *Engine) Compact(fullCompaction bool) error {
+	// we're looping here to ensure that the files we've marked to compact are
+	// still there after we've obtained the write lock
+	var minTime, maxTime int64
+	var files dataFiles
+	for {
+		if fullCompaction {
+			files = e.copyFilesCollection()
+		} else {
+			files = e.filesToCompact()
+		}
+		if len(files) < 2 {
+			return nil
+		}
+		minTime = files[0].MinTime()
+		maxTime = files[len(files)-1].MaxTime()
+
+		e.writeLock.LockRange(minTime, maxTime)
+
+		// if the files are different after obtaining the write lock, one or more
+		// was rewritten. Release the lock and try again. This shouldn't happen really.
+		var filesAfterLock dataFiles
+		if fullCompaction {
+			filesAfterLock = e.copyFilesCollection()
+		} else {
+			filesAfterLock = e.filesToCompact()
+		}
+		if !reflect.DeepEqual(files, filesAfterLock) {
+			e.writeLock.UnlockRange(minTime, maxTime)
+			continue
+		}
+
+		// we've got the write lock and the files are all there
+		break
+	}
+
+	// mark the compaction as running
+	e.filesLock.Lock()
+	if e.compactionRunning {
+		e.filesLock.Unlock()
+		return nil
+	}
+	e.compactionRunning = true
+	e.filesLock.Unlock()
+	defer func() {
+		//release the lock
+		e.writeLock.UnlockRange(minTime, maxTime)
+		e.filesLock.Lock()
+		e.lastCompactionTime = time.Now()
+		e.compactionRunning = false
+		e.filesLock.Unlock()
+	}()
+
+	var s string
+	if fullCompaction {
+		s = "FULL "
+	}
+	fileName := e.nextFileName()
+	e.logger.Printf("Starting %scompaction in partition %s of %d files to new file %s", s, e.path, len(files), fileName)
+	st := time.Now()
+
+	positions := make([]uint32, len(files))
+	ids := make([]uint64, len(files))
+
+	// initilaize for writing
+	f, err := e.openFileAndCheckpoint(fileName)
+
+	for i, df := range files {
+		ids[i] = btou64(df.mmap[4:12])
+		positions[i] = 4
+	}
+	currentPosition := uint32(fileHeaderSize)
+	newPositions := make([]uint32, 0)
+	newIDs := make([]uint64, 0)
+	buf := make([]byte, e.RotateBlockSize)
+	for {
+		// find the min ID so we can write it to the file
+		minID := uint64(math.MaxUint64)
+		for _, id := range ids {
+			if minID > id && id != 0 {
+				minID = id
+			}
+		}
+		if minID == math.MaxUint64 { // we've emptied all the files
+			break
+		}
+
+		newIDs = append(newIDs, minID)
+		newPositions = append(newPositions, currentPosition)
+
+		// write the blocks in order from the files with this id. as we
+		// go merge blocks together from one file to another, if the right size
+		var previousValues Values
+		for i, id := range ids {
+			if id != minID {
+				continue
+			}
+			df := files[i]
+			pos := positions[i]
+			fid, _, block := df.block(pos)
+			if fid != id {
+				panic("not possible")
+			}
+			newPos := pos + uint32(blockHeaderSize+len(block))
+			positions[i] = newPos
+
+			// write the blocks out to file that are already at their size limit
+			for {
+				// write the values, the block or combine with previous
+				if len(previousValues) > 0 {
+					previousValues = append(previousValues, previousValues.DecodeSameTypeBlock(block)...)
+				} else if len(block) > e.RotateBlockSize {
+					if _, err := f.Write(df.mmap[pos:newPos]); err != nil {
+						return err
+					}
+					currentPosition += uint32(newPos - pos)
+				} else {
+					// TODO: handle decode error
+					previousValues, _ = DecodeBlock(block)
+				}
+
+				// write the previous values and clear if we've hit the limit
+				if len(previousValues) > e.MaxPointsPerBlock {
+					b, err := previousValues.Encode(buf)
+					if err != nil {
+						panic(fmt.Sprintf("failure encoding block: %v", err))
+					}
+
+					if err := e.writeBlock(f, id, b); err != nil {
+						// fail hard. If we can't write a file someone needs to get woken up
+						panic(fmt.Sprintf("failure writing block: %s", err.Error()))
+					}
+					currentPosition += uint32(blockHeaderSize + len(b))
+					previousValues = nil
+				}
+
+				// if the next block is the same ID, we don't need to decode this one
+				// so we can just write it out to the file
+				nextID, _, nextBlock := df.block(newPos)
+
+				// move to the next block in this file only if the id is the same
+				if nextID != id {
+					// flush remaining values
+					if len(previousValues) > 0 {
+						b, err := previousValues.Encode(buf)
+						if err != nil {
+							panic(fmt.Sprintf("failure encoding block: %v", err))
+						}
+						currentPosition += uint32(blockHeaderSize + len(b))
+						previousValues = nil
+						if err := e.writeBlock(f, id, b); err != nil {
+							panic(fmt.Sprintf("error writing file %s: %s", f.Name(), err.Error()))
+						}
+					}
+					ids[i] = nextID
+					break
+				}
+				pos = newPos
+				newPos = pos + uint32(blockHeaderSize+len(nextBlock))
+				positions[i] = newPos
+				block = nextBlock
+			}
+		}
+
+		if len(previousValues) > 0 {
+			b, err := previousValues.Encode(buf)
+			if err != nil {
+				panic(fmt.Sprintf("failure encoding block: %v", err))
+			}
+
+			if err := e.writeBlock(f, minID, b); err != nil {
+				// fail hard. If we can't write a file someone needs to get woken up
+				panic(fmt.Sprintf("failure writing block: %s", err.Error()))
+			}
+			currentPosition += uint32(blockHeaderSize + len(b))
+		}
+	}
+
+	newDF, err := e.writeIndexAndGetDataFile(f, minTime, maxTime, newIDs, newPositions)
+	if err != nil {
+		return err
+	}
+
+	// update engine with new file pointers
+	e.filesLock.Lock()
+	var newFiles dataFiles
+	for _, df := range e.files {
+		// exclude any files that were compacted
+		include := true
+		for _, f := range files {
+			if f == df {
+				include = false
+				break
+			}
+		}
+		if include {
+			newFiles = append(newFiles, df)
+		}
+	}
+	newFiles = append(newFiles, newDF)
+	sort.Sort(newFiles)
+	e.files = newFiles
+	e.filesLock.Unlock()
+
+	e.logger.Printf("Compaction of %s took %s", e.path, time.Since(st))
+
+	// delete the old files in a goroutine so running queries won't block the write
+	// from completing
+	e.deletesPending.Add(1)
+	go func() {
+		for _, f := range files {
+			if err := f.Delete(); err != nil {
+				e.logger.Println("ERROR DELETING:", f.f.Name())
+			}
+		}
+		e.deletesPending.Done()
+	}()
+
+	return nil
+}
+
+func (e *Engine) writeBlock(f *os.File, id uint64, block []byte) error {
+	if _, err := f.Write(append(u64tob(id), u32tob(uint32(len(block)))...)); err != nil {
+		return err
+	}
+	_, err := f.Write(block)
+	return err
+}
+
+func (e *Engine) writeIndexAndGetDataFile(f *os.File, minTime, maxTime int64, ids []uint64, newPositions []uint32) (*dataFile, error) {
+	// write the file index, starting with the series ids and their positions
+	for i, id := range ids {
+		if _, err := f.Write(u64tob(id)); err != nil {
+			return nil, err
+		}
+		if _, err := f.Write(u32tob(newPositions[i])); err != nil {
+			return nil, err
+		}
+	}
+
+	// write the min time, max time
+	if _, err := f.Write(append(u64tob(uint64(minTime)), u64tob(uint64(maxTime))...)); err != nil {
+		return nil, err
+	}
+
+	// series count
+	if _, err := f.Write(u32tob(uint32(len(ids)))); err != nil {
+		return nil, err
+	}
+
+	// sync it and see4k back to the beginning to hand off to the mmap
+	if err := f.Sync(); err != nil {
+		return nil, err
+	}
+	if _, err := f.Seek(0, 0); err != nil {
+		return nil, err
+	}
+
+	if err := e.removeCheckpoint(f.Name()); err != nil {
+		return nil, err
+	}
+
+	// now open it as a memory mapped data file
+	newDF, err := NewDataFile(f)
+	if err != nil {
+		return nil, err
+	}
+
+	return newDF, nil
+}
+
+func (e *Engine) shouldCompact() bool {
+	e.filesLock.RLock()
+	running := e.compactionRunning
+	since := time.Since(e.lastCompactionTime)
+	deletesPending := len(e.deletes) > 0
+	e.filesLock.RUnlock()
+	if running || since < e.IndexMinCompactionInterval || deletesPending {
+		return false
+	}
+	return len(e.filesToCompact()) >= e.MinCompactionFileCount
+}
+
+func (e *Engine) filesToCompact() dataFiles {
+	e.filesLock.RLock()
+	defer e.filesLock.RUnlock()
+
+	var a dataFiles
+	for _, df := range e.files {
+		if time.Since(df.modTime) > e.CompactionAge && df.size < MaxDataFileSize {
+			a = append(a, df)
+		} else if len(a) > 0 {
+			// only compact contiguous ranges. If we hit the negative case and
+			// there are files to compact, stop here
+			break
+		}
+	}
+	return a
+}
+
+func (e *Engine) convertKeysAndWriteMetadata(pointsByKey map[string]Values, measurementFieldsToSave map[string]*tsdb.MeasurementFields, seriesToCreate []*tsdb.SeriesCreate) (err error, minTime, maxTime int64, valuesByID map[uint64]Values) {
+	e.metaLock.Lock()
+	defer e.metaLock.Unlock()
+
+	if err := e.writeNewFields(measurementFieldsToSave); err != nil {
+		return err, 0, 0, nil
+	}
+	if err := e.writeNewSeries(seriesToCreate); err != nil {
+		return err, 0, 0, nil
+	}
+
+	if len(pointsByKey) == 0 {
+		return nil, 0, 0, nil
+	}
+
+	// read in keys and assign any that aren't defined
+	b, err := e.readCompressedFile(IDsFileExtension)
+	if err != nil {
+		return err, 0, 0, nil
+	}
+	ids := make(map[string]uint64)
+	if b != nil {
+		if err := json.Unmarshal(b, &ids); err != nil {
+			return err, 0, 0, nil
+		}
+	}
+
+	// these are values that are newer than anything stored in the shard
+	valuesByID = make(map[uint64]Values)
+
+	idToKey := make(map[uint64]string)    // we only use this map if new ids are being created
+	collisions := make(map[string]uint64) // we only use this if a collision is encountered
+	newKeys := false
+	// track the min and max time of values being inserted so we can lock that time range
+	minTime = int64(math.MaxInt64)
+	maxTime = int64(math.MinInt64)
+	for k, values := range pointsByKey {
+		var id uint64
+		var ok bool
+		if id, ok = ids[k]; !ok {
+			// populate the map if we haven't already
+
+			if len(idToKey) == 0 {
+				for n, id := range ids {
+					idToKey[id] = n
+				}
+			}
+
+			// now see if the hash id collides with a different key
+			hashID := e.HashSeriesField(k)
+			existingKey, idInMap := idToKey[hashID]
+			// we only care if the keys are different. if so, it's a hash collision we have to keep track of
+			if idInMap && k != existingKey {
+				// we have a collision, find this new key the next available id
+				hashID = 0
+				for {
+					hashID++
+					if _, ok := idToKey[hashID]; !ok {
+						// next ID is available, use it
+						break
+					}
+				}
+				collisions[k] = hashID
+			}
+
+			newKeys = true
+			ids[k] = hashID
+			idToKey[hashID] = k
+			id = hashID
+		}
+
+		if minTime > values.MinTime() {
+			minTime = values.MinTime()
+		}
+		if maxTime < values.MaxTime() {
+			maxTime = values.MaxTime()
+		}
+
+		valuesByID[id] = values
+	}
+
+	if newKeys {
+		b, err := json.Marshal(ids)
+		if err != nil {
+			return err, 0, 0, nil
+		}
+		if err := e.replaceCompressedFile(IDsFileExtension, b); err != nil {
+			return err, 0, 0, nil
+		}
+	}
+
+	if len(collisions) > 0 {
+		e.saveNewCollisions(collisions)
+	}
+
+	return
+}
+
+func (e *Engine) saveNewCollisions(collisions map[string]uint64) error {
+	e.collisionsLock.Lock()
+	defer e.collisionsLock.Unlock()
+
+	for k, v := range collisions {
+		e.collisions[k] = v
+	}
+
+	data, err := json.Marshal(e.collisions)
+
+	if err != nil {
+		return err
+	}
+
+	return e.replaceCompressedFile(CollisionsFileExtension, data)
+}
+
+func (e *Engine) readCollisions() error {
+	e.collisions = make(map[string]uint64)
+	data, err := e.readCompressedFile(CollisionsFileExtension)
+	if err != nil {
+		return err
+	}
+
+	if len(data) == 0 {
+		return nil
+	}
+
+	return json.Unmarshal(data, &e.collisions)
+}
+
+// filterDataBetweenTimes will create a new map with data between
+// the minTime (inclusive) and maxTime (exclusive) while removing that
+// data from the passed in map. It is assume that the Values arrays
+// are sorted in time ascending order
+func (e *Engine) filterDataBetweenTimes(valuesByID map[uint64]Values, minTime, maxTime int64) map[uint64]Values {
+	filteredValues := make(map[uint64]Values)
+	for id, values := range valuesByID {
+		maxIndex := len(values)
+		minIndex := -1
+		// find the index of the first value in the range
+		for i, v := range values {
+			t := v.UnixNano()
+			if t >= minTime && t < maxTime {
+				minIndex = i
+				break
+			}
+		}
+		if minIndex == -1 {
+			continue
+		}
+		// go backwards to find the index of the last value in the range
+		for i := len(values) - 1; i >= 0; i-- {
+			t := values[i].UnixNano()
+			if t < maxTime {
+				maxIndex = i + 1
+				break
+			}
+		}
+
+		// write into the result map and filter the passed in map
+		filteredValues[id] = values[minIndex:maxIndex]
+
+		// if we grabbed all the values, remove them from the passed in map
+		if minIndex == len(values) || (minIndex == 0 && maxIndex == len(values)) {
+			delete(valuesByID, id)
+			continue
+		}
+
+		valuesByID[id] = values[0:minIndex]
+		if maxIndex < len(values) {
+			valuesByID[id] = append(valuesByID[id], values[maxIndex:]...)
+		}
+	}
+	return filteredValues
+}
+
+// rewriteFile will read in the old data file, if provided and merge the values
+// in the passed map into a new data file
+func (e *Engine) rewriteFile(oldDF *dataFile, valuesByID map[uint64]Values) error {
+	if len(valuesByID) == 0 {
+		return nil
+	}
+
+	// we need the values in sorted order so that we can merge them into the
+	// new file as we read the old file
+	ids := make([]uint64, 0, len(valuesByID))
+	for id, _ := range valuesByID {
+		ids = append(ids, id)
+	}
+
+	minTime := int64(math.MaxInt64)
+	maxTime := int64(math.MinInt64)
+
+	// read header of ids to starting positions and times
+	oldIDToPosition := make(map[uint64]uint32)
+	if oldDF != nil {
+		oldIDToPosition = oldDF.IDToPosition()
+		minTime = oldDF.MinTime()
+		maxTime = oldDF.MaxTime()
+	}
+
+	for _, v := range valuesByID {
+		if minTime > v.MinTime() {
+			minTime = v.MinTime()
+		}
+		if maxTime < v.MaxTime() {
+			// add 1 ns to the time since maxTime is exclusive
+			maxTime = v.MaxTime() + 1
+		}
+	}
+
+	// add any ids that are in the file that aren't getting flushed here
+	for id, _ := range oldIDToPosition {
+		if _, ok := valuesByID[id]; !ok {
+			ids = append(ids, id)
+		}
+	}
+
+	// always write in order by ID
+	sort.Sort(uint64slice(ids))
+
+	f, err := e.openFileAndCheckpoint(e.nextFileName())
+	if err != nil {
+		return err
+	}
+
+	if oldDF == nil {
+		e.logger.Printf("writing new index file %s", f.Name())
+	} else {
+		e.logger.Printf("rewriting index file %s with %s", oldDF.f.Name(), f.Name())
+	}
+
+	// now combine the old file data with the new values, keeping track of
+	// their positions
+	currentPosition := uint32(fileHeaderSize)
+	newPositions := make([]uint32, len(ids))
+	buf := make([]byte, e.MaxPointsPerBlock*20)
+	for i, id := range ids {
+		// mark the position for this ID
+		newPositions[i] = currentPosition
+
+		newVals := valuesByID[id]
+
+		// if this id is only in the file and not in the new values, just copy over from old file
+		if len(newVals) == 0 {
+			fpos := oldIDToPosition[id]
+
+			// write the blocks until we hit whatever the next id is
+			for {
+				fid := btou64(oldDF.mmap[fpos : fpos+8])
+				if fid != id {
+					break
+				}
+				length := btou32(oldDF.mmap[fpos+8 : fpos+12])
+				if _, err := f.Write(oldDF.mmap[fpos : fpos+12+length]); err != nil {
+					f.Close()
+					return err
+				}
+				fpos += (12 + length)
+				currentPosition += (12 + length)
+
+				// make sure we're not at the end of the file
+				if fpos >= oldDF.size {
+					break
+				}
+			}
+
+			continue
+		}
+
+		// if the values are not in the file, just write the new ones
+		fpos, ok := oldIDToPosition[id]
+		if !ok {
+			// TODO: ensure we encode only the amount in a block
+			block, err := newVals.Encode(buf)
+			if err != nil {
+				f.Close()
+				return err
+			}
+
+			if err := e.writeBlock(f, id, block); err != nil {
+				f.Close()
+				return err
+			}
+			currentPosition += uint32(blockHeaderSize + len(block))
+
+			continue
+		}
+
+		// it's in the file and the new values, combine them and write out
+		for {
+			fid, _, block := oldDF.block(fpos)
+			if fid != id {
+				break
+			}
+			fpos += uint32(blockHeaderSize + len(block))
+
+			// determine if there's a block after this with the same id and get its time
+			nextID, nextTime, _ := oldDF.block(fpos)
+			hasFutureBlock := nextID == id
+
+			nv, newBlock, err := e.DecodeAndCombine(newVals, block, buf[:0], nextTime, hasFutureBlock)
+			newVals = nv
+			if err != nil {
+				return err
+			}
+			if _, err := f.Write(append(u64tob(id), u32tob(uint32(len(newBlock)))...)); err != nil {
+				f.Close()
+				return err
+			}
+			if _, err := f.Write(newBlock); err != nil {
+				f.Close()
+				return err
+			}
+
+			currentPosition += uint32(blockHeaderSize + len(newBlock))
+
+			if fpos >= oldDF.indexPosition() {
+				break
+			}
+		}
+
+		// TODO: ensure we encode only the amount in a block, refactor this wil line 450 into func
+		if len(newVals) > 0 {
+			// TODO: ensure we encode only the amount in a block
+			block, err := newVals.Encode(buf)
+			if err != nil {
+				f.Close()
+				return err
+			}
+
+			if _, err := f.Write(append(u64tob(id), u32tob(uint32(len(block)))...)); err != nil {
+				f.Close()
+				return err
+			}
+			if _, err := f.Write(block); err != nil {
+				f.Close()
+				return err
+			}
+			currentPosition += uint32(blockHeaderSize + len(block))
+		}
+	}
+
+	newDF, err := e.writeIndexAndGetDataFile(f, minTime, maxTime, ids, newPositions)
+	if err != nil {
+		f.Close()
+		return err
+	}
+
+	// update the engine to point at the new dataFiles
+	e.filesLock.Lock()
+	var files dataFiles
+	for _, df := range e.files {
+		if df != oldDF {
+			files = append(files, df)
+		}
+	}
+	files = append(files, newDF)
+	sort.Sort(files)
+	e.files = files
+	e.filesLock.Unlock()
+
+	// remove the old data file. no need to block returning the write,
+	// but we need to let any running queries finish before deleting it
+	if oldDF != nil {
+		e.deletesPending.Add(1)
+		go func() {
+			if err := oldDF.Delete(); err != nil {
+				e.logger.Println("ERROR DELETING FROM REWRITE:", oldDF.f.Name())
+			}
+			e.deletesPending.Done()
+		}()
+	}
+
+	return nil
+}
+
+// flushDeletes will lock the entire shard and rewrite all index files so they no
+// longer contain the flushed IDs
+func (e *Engine) flushDeletes() error {
+	e.writeLock.LockRange(math.MinInt64, math.MaxInt64)
+	defer e.writeLock.UnlockRange(math.MinInt64, math.MaxInt64)
+	e.metaLock.Lock()
+	defer e.metaLock.Unlock()
+
+	measurements := make(map[string]bool)
+	deletes := make(map[uint64]string)
+	e.filesLock.RLock()
+	for name, _ := range e.deleteMeasurements {
+		measurements[name] = true
+	}
+	for id, key := range e.deletes {
+		deletes[id] = key
+	}
+	e.filesLock.RUnlock()
+
+	// if we're deleting measurements, rewrite the field data
+	if len(measurements) > 0 {
+		fields, err := e.readFields()
+		if err != nil {
+			return err
+		}
+		for name, _ := range measurements {
+			delete(fields, name)
+		}
+		if err := e.writeFields(fields); err != nil {
+			return err
+		}
+	}
+
+	series, err := e.readSeries()
+	if err != nil {
+		return err
+	}
+	for _, key := range deletes {
+		seriesName, _ := seriesAndFieldFromCompositeKey(key)
+		delete(series, seriesName)
+	}
+	if err := e.writeSeries(series); err != nil {
+		return err
+	}
+
+	// now remove the raw time series data from the data files
+	files := e.copyFilesCollection()
+	newFiles := make(dataFiles, 0, len(files))
+	for _, f := range files {
+		newFiles = append(newFiles, e.writeNewFileExcludeDeletes(f))
+	}
+
+	// update the delete map and files
+	e.filesLock.Lock()
+	defer e.filesLock.Unlock()
+
+	e.files = newFiles
+
+	// remove the things we've deleted from the map
+	for name, _ := range measurements {
+		delete(e.deleteMeasurements, name)
+	}
+	for id, _ := range deletes {
+		delete(e.deletes, id)
+	}
+
+	e.deletesPending.Add(1)
+	go func() {
+		for _, oldDF := range files {
+			if err := oldDF.Delete(); err != nil {
+				e.logger.Println("ERROR DELETING FROM REWRITE:", oldDF.f.Name())
+			}
+		}
+		e.deletesPending.Done()
+	}()
+	return nil
+}
+
+func (e *Engine) writeNewFileExcludeDeletes(oldDF *dataFile) *dataFile {
+	f, err := e.openFileAndCheckpoint(e.nextFileName())
+	if err != nil {
+		panic(fmt.Sprintf("error opening new data file: %s", err.Error()))
+	}
+
+	ids := make([]uint64, 0)
+	positions := make([]uint32, 0)
+
+	indexPosition := oldDF.indexPosition()
+	currentPosition := uint32(fileHeaderSize)
+	currentID := uint64(0)
+	for currentPosition < indexPosition {
+		id := btou64(oldDF.mmap[currentPosition : currentPosition+8])
+		length := btou32(oldDF.mmap[currentPosition+8 : currentPosition+blockHeaderSize])
+		newPosition := currentPosition + blockHeaderSize + length
+
+		if _, ok := e.deletes[id]; ok {
+			currentPosition = newPosition
+			continue
+		}
+
+		if _, err := f.Write(oldDF.mmap[currentPosition:newPosition]); err != nil {
+			panic(fmt.Sprintf("error writing new index file: %s", err.Error()))
+		}
+		if id != currentID {
+			currentID = id
+			ids = append(ids, id)
+			positions = append(positions, currentPosition)
+		}
+		currentPosition = newPosition
+	}
+
+	df, err := e.writeIndexAndGetDataFile(f, oldDF.MinTime(), oldDF.MaxTime(), ids, positions)
+	if err != nil {
+		panic(fmt.Sprintf("error writing new index file: %s", err.Error()))
+	}
+
+	return df
+}
+
+func (e *Engine) nextFileName() string {
+	e.filesLock.Lock()
+	defer e.filesLock.Unlock()
+	e.currentFileID++
+	return filepath.Join(e.path, fmt.Sprintf("%07d.%s", e.currentFileID, Format))
+}
+
+func (e *Engine) readCompressedFile(name string) ([]byte, error) {
+	f, err := os.OpenFile(filepath.Join(e.path, name), os.O_RDONLY, 0666)
+	if os.IsNotExist(err) {
+		return nil, nil
+	} else if err != nil {
+		return nil, err
+	}
+	b, err := ioutil.ReadAll(f)
+	if err != nil {
+		return nil, err
+	}
+
+	data, err := snappy.Decode(nil, b)
+	if err != nil {
+		return nil, err
+	}
+	return data, nil
+}
+
+func (e *Engine) replaceCompressedFile(name string, data []byte) error {
+	tmpName := filepath.Join(e.path, name+"tmp")
+	f, err := os.OpenFile(tmpName, os.O_CREATE|os.O_RDWR, 0666)
+	if err != nil {
+		return err
+	}
+	b := snappy.Encode(nil, data)
+	if _, err := f.Write(b); err != nil {
+		return err
+	}
+	if err := f.Close(); err != nil {
+		return err
+	}
+	if err := os.Remove(name); err != nil && !os.IsNotExist(err) {
+		return err
+	}
+	return os.Rename(tmpName, filepath.Join(e.path, name))
+}
+
+// keysWithFields takes the map of measurements to their fields and a set of series keys
+// and returns the columnar keys for the keys and fields
+func (e *Engine) keysWithFields(fields map[string]*tsdb.MeasurementFields, keys []string) []string {
+	e.WAL.cacheLock.RLock()
+	defer e.WAL.cacheLock.RUnlock()
+
+	a := make([]string, 0)
+	for _, k := range keys {
+		measurement := tsdb.MeasurementFromSeriesKey(k)
+
+		// add the fields from the index
+		mf := fields[measurement]
+		if mf != nil {
+			for _, f := range mf.Fields {
+				a = append(a, SeriesFieldKey(k, f.Name))
+			}
+		}
+
+		// now add any fields from the WAL that haven't been flushed yet
+		mf = e.WAL.measurementFieldsCache[measurement]
+		if mf != nil {
+			for _, f := range mf.Fields {
+				a = append(a, SeriesFieldKey(k, f.Name))
+			}
+		}
+	}
+
+	return a
+}
+
+// DeleteSeries deletes the series from the engine.
+func (e *Engine) DeleteSeries(seriesKeys []string) error {
+	e.metaLock.Lock()
+	defer e.metaLock.Unlock()
+
+	fields, err := e.readFields()
+	if err != nil {
+		return err
+	}
+
+	keyFields := e.keysWithFields(fields, seriesKeys)
+	e.filesLock.Lock()
+	defer e.filesLock.Unlock()
+	for _, key := range keyFields {
+		e.deletes[e.keyToID(key)] = key
+	}
+
+	return e.WAL.DeleteSeries(keyFields)
+}
+
+// DeleteMeasurement deletes a measurement and all related series.
+func (e *Engine) DeleteMeasurement(name string, seriesKeys []string) error {
+	e.metaLock.Lock()
+	defer e.metaLock.Unlock()
+
+	fields, err := e.readFields()
+	if err != nil {
+		return err
+	}
+
+	// mark the measurement, series keys and the fields for deletion on the next flush
+	// also serves as a tombstone for any queries that come in before the flush
+	keyFields := e.keysWithFields(fields, seriesKeys)
+	e.filesLock.Lock()
+	defer e.filesLock.Unlock()
+
+	e.deleteMeasurements[name] = true
+	for _, k := range keyFields {
+		e.deletes[e.keyToID(k)] = k
+	}
+
+	return e.WAL.DeleteMeasurement(name, seriesKeys)
+}
+
+// SeriesCount returns the number of series buckets on the shard.
+func (e *Engine) SeriesCount() (n int, err error) {
+	return 0, nil
+}
+
+// Begin starts a new transaction on the engine.
+func (e *Engine) Begin(writable bool) (tsdb.Tx, error) {
+	e.queryLock.RLock()
+
+	var files dataFiles
+
+	// we do this to ensure that the data files haven't been deleted from a compaction
+	// while we were waiting to get the query lock
+	for {
+		files = e.copyFilesCollection()
+
+		// get the query lock
+		for _, f := range files {
+			f.mu.RLock()
+		}
+
+		// ensure they're all still open
+		reset := false
+		for _, f := range files {
+			if f.f == nil {
+				reset = true
+				break
+			}
+		}
+
+		// if not, release and try again
+		if reset {
+			for _, f := range files {
+				f.mu.RUnlock()
+			}
+			continue
+		}
+
+		// we're good to go
+		break
+	}
+
+	return &tx{files: files, engine: e}, nil
+}
+
+func (e *Engine) WriteTo(w io.Writer) (n int64, err error) { panic("not implemented") }
+
+func (e *Engine) keyToID(key string) uint64 {
+	// get the ID for the key and be sure to check if it had hash collision before
+	e.collisionsLock.RLock()
+	id, ok := e.collisions[key]
+	e.collisionsLock.RUnlock()
+
+	if !ok {
+		id = e.HashSeriesField(key)
+	}
+	return id
+}
+
+func (e *Engine) keyAndFieldToID(series, field string) uint64 {
+	key := SeriesFieldKey(series, field)
+	return e.keyToID(key)
+}
+
+func (e *Engine) copyFilesCollection() []*dataFile {
+	e.filesLock.RLock()
+	defer e.filesLock.RUnlock()
+	a := make([]*dataFile, len(e.files))
+	copy(a, e.files)
+	return a
+}
+
+func (e *Engine) writeNewFields(measurementFieldsToSave map[string]*tsdb.MeasurementFields) error {
+	if len(measurementFieldsToSave) == 0 {
+		return nil
+	}
+
+	// read in all the previously saved fields
+	fields, err := e.readFields()
+	if err != nil {
+		return err
+	}
+
+	// add the new ones or overwrite old ones
+	for name, mf := range measurementFieldsToSave {
+		fields[name] = mf
+	}
+
+	return e.writeFields(fields)
+}
+
+func (e *Engine) writeFields(fields map[string]*tsdb.MeasurementFields) error {
+	// compress and save everything
+	data, err := json.Marshal(fields)
+	if err != nil {
+		return err
+	}
+
+	fn := filepath.Join(e.path, FieldsFileExtension+"tmp")
+	ff, err := os.OpenFile(fn, os.O_CREATE|os.O_RDWR, 0666)
+	if err != nil {
+		return err
+	}
+	_, err = ff.Write(snappy.Encode(nil, data))
+	if err != nil {
+		return err
+	}
+	if err := ff.Close(); err != nil {
+		return err
+	}
+	fieldsFileName := filepath.Join(e.path, FieldsFileExtension)
+
+	if _, err := os.Stat(fieldsFileName); !os.IsNotExist(err) {
+		if err := os.Remove(fieldsFileName); err != nil {
+			return err
+		}
+	}
+
+	return os.Rename(fn, fieldsFileName)
+}
+
+func (e *Engine) readFields() (map[string]*tsdb.MeasurementFields, error) {
+	fields := make(map[string]*tsdb.MeasurementFields)
+
+	f, err := os.OpenFile(filepath.Join(e.path, FieldsFileExtension), os.O_RDONLY, 0666)
+	if os.IsNotExist(err) {
+		return fields, nil
+	} else if err != nil {
+		return nil, err
+	}
+	b, err := ioutil.ReadAll(f)
+	if err != nil {
+		return nil, err
+	}
+
+	data, err := snappy.Decode(nil, b)
+	if err != nil {
+		return nil, err
+	}
+
+	if err := json.Unmarshal(data, &fields); err != nil {
+		return nil, err
+	}
+
+	return fields, nil
+}
+
+func (e *Engine) writeNewSeries(seriesToCreate []*tsdb.SeriesCreate) error {
+	if len(seriesToCreate) == 0 {
+		return nil
+	}
+
+	// read in previously saved series
+	series, err := e.readSeries()
+	if err != nil {
+		return err
+	}
+
+	// add new ones, compress and save
+	for _, s := range seriesToCreate {
+		series[s.Series.Key] = s.Series
+	}
+
+	return e.writeSeries(series)
+}
+
+func (e *Engine) writeSeries(series map[string]*tsdb.Series) error {
+	data, err := json.Marshal(series)
+	if err != nil {
+		return err
+	}
+
+	fn := filepath.Join(e.path, SeriesFileExtension+"tmp")
+	ff, err := os.OpenFile(fn, os.O_CREATE|os.O_RDWR, 0666)
+	if err != nil {
+		return err
+	}
+	_, err = ff.Write(snappy.Encode(nil, data))
+	if err != nil {
+		return err
+	}
+	if err := ff.Close(); err != nil {
+		return err
+	}
+	seriesFileName := filepath.Join(e.path, SeriesFileExtension)
+
+	if _, err := os.Stat(seriesFileName); !os.IsNotExist(err) {
+		if err := os.Remove(seriesFileName); err != nil && err != os.ErrNotExist {
+			return err
+		}
+	}
+
+	return os.Rename(fn, seriesFileName)
+}
+
+func (e *Engine) readSeries() (map[string]*tsdb.Series, error) {
+	series := make(map[string]*tsdb.Series)
+
+	f, err := os.OpenFile(filepath.Join(e.path, SeriesFileExtension), os.O_RDONLY, 0666)
+	if os.IsNotExist(err) {
+		return series, nil
+	} else if err != nil {
+		return nil, err
+	}
+	defer f.Close()
+	b, err := ioutil.ReadAll(f)
+	if err != nil {
+		return nil, err
+	}
+
+	data, err := snappy.Decode(nil, b)
+	if err != nil {
+		return nil, err
+	}
+
+	if err := json.Unmarshal(data, &series); err != nil {
+		return nil, err
+	}
+
+	return series, nil
+}
+
+// DecodeAndCombine take an encoded block from a file, decodes it and interleaves the file
+// values with the values passed in. nextTime and hasNext refer to if the file
+// has future encoded blocks so that this method can know how much of its values can be
+// combined and output in the resulting encoded block.
+func (e *Engine) DecodeAndCombine(newValues Values, block, buf []byte, nextTime int64, hasFutureBlock bool) (Values, []byte, error) {
+	values := newValues.DecodeSameTypeBlock(block)
+
+	var remainingValues Values
+
+	if hasFutureBlock {
+		// take all values that have times less than the future block and update the vals array
+		pos := sort.Search(len(newValues), func(i int) bool {
+			return newValues[i].Time().UnixNano() >= nextTime
+		})
+		values = append(values, newValues[:pos]...)
+		remainingValues = newValues[pos:]
+		values = values.Deduplicate()
+	} else {
+		requireSort := values.MaxTime() >= newValues.MinTime()
+		values = append(values, newValues...)
+		if requireSort {
+			values = values.Deduplicate()
+		}
+	}
+
+	if len(values) > e.MaxPointsPerBlock {
+		remainingValues = values[e.MaxPointsPerBlock:]
+		values = values[:e.MaxPointsPerBlock]
+	}
+
+	encoded, err := values.Encode(buf)
+	if err != nil {
+		return nil, nil, err
+	}
+	return remainingValues, encoded, nil
+}
+
+// removeFileIfCheckpointExists will remove the file if its associated checkpoint fil is there.
+// It returns true if the file was removed. This is for recovery of data files on startup
+func (e *Engine) removeFileIfCheckpointExists(fileName string) bool {
+	checkpointName := fmt.Sprintf("%s.%s", fileName, CheckpointExtension)
+	_, err := os.Stat(checkpointName)
+
+	// if there's no checkpoint, move on
+	if err != nil {
+		return false
+	}
+
+	// there's a checkpoint so we know this file isn't safe so we should remove it
+	err = os.Remove(fileName)
+	if err != nil {
+		panic(fmt.Sprintf("error removing file %s", err.Error()))
+	}
+
+	err = os.Remove(checkpointName)
+	if err != nil {
+		panic(fmt.Sprintf("error removing file %s", err.Error()))
+	}
+
+	return true
+}
+
+// cleanupMetafile will remove the tmp file if the other file exists, or rename the
+// tmp file to be a regular file if the normal file is missing. This is for recovery on
+// startup.
+func (e *Engine) cleanupMetafile(name string) {
+	fileName := filepath.Join(e.path, name)
+	tmpName := fileName + "tmp"
+
+	_, err := os.Stat(tmpName)
+
+	// if the tmp file isn't there, we can just exit
+	if err != nil {
+		return
+	}
+
+	_, err = os.Stat(fileName)
+
+	// the regular file is there so we should just remove the tmp file
+	if err == nil {
+		err = os.Remove(tmpName)
+		if err != nil {
+			panic(fmt.Sprintf("error removing meta file %s: %s", tmpName, err.Error()))
+		}
+	}
+
+	// regular file isn't there so have the tmp file take its place
+	err = os.Rename(tmpName, fileName)
+	if err != nil {
+		panic(fmt.Sprintf("error renaming meta file %s: %s", tmpName, err.Error()))
+	}
+}
+
+// openFileAndCehckpoint will create a checkpoint file, open a new file for
+// writing a data index, write the header and return the file
+func (e *Engine) openFileAndCheckpoint(fileName string) (*os.File, error) {
+	checkpointFile := fmt.Sprintf("%s.%s", fileName, CheckpointExtension)
+	cf, err := os.OpenFile(checkpointFile, os.O_CREATE, 0666)
+	if err != nil {
+		return nil, err
+	}
+	// _, err = cf.Write(u32tob(magicNumber))
+	// if err != nil {
+	// 	panic(err)
+	// }
+	if err := cf.Close(); err != nil {
+		return nil, err
+	}
+	_, err = os.Stat(checkpointFile)
+
+	f, err := os.OpenFile(fileName, os.O_CREATE|os.O_RDWR, 0666)
+	if err != nil {
+		return nil, err
+	}
+
+	// write the header, which is just the magic number
+	if _, err := f.Write(u32tob(magicNumber)); err != nil {
+		f.Close()
+		return nil, err
+	}
+
+	return f, nil
+}
+
+// removeCheckpoint removes the checkpoint for a new data file that was getting written
+func (e *Engine) removeCheckpoint(fileName string) error {
+	checkpointFile := fmt.Sprintf("%s.%s", fileName, CheckpointExtension)
+	return os.Remove(checkpointFile)
+}
+
+type dataFile struct {
+	f       *os.File
+	mu      sync.RWMutex
+	size    uint32
+	modTime time.Time
+	mmap    []byte
+}
+
+// byte size constants for the data file
+const (
+	fileHeaderSize     = 4
+	seriesCountSize    = 4
+	timeSize           = 8
+	blockHeaderSize    = 12
+	seriesIDSize       = 8
+	seriesPositionSize = 4
+	seriesHeaderSize   = seriesIDSize + seriesPositionSize
+	minTimeOffset      = 20
+	maxTimeOffset      = 12
+)
+
+func NewDataFile(f *os.File) (*dataFile, error) {
+	fInfo, err := f.Stat()
+	if err != nil {
+		return nil, err
+	}
+	mmap, err := syscall.Mmap(int(f.Fd()), 0, int(fInfo.Size()), syscall.PROT_READ, syscall.MAP_SHARED|MAP_POPULATE)
+	if err != nil {
+		return nil, err
+	}
+
+	return &dataFile{
+		f:       f,
+		mmap:    mmap,
+		size:    uint32(fInfo.Size()),
+		modTime: fInfo.ModTime(),
+	}, nil
+}
+
+func (d *dataFile) Close() error {
+	d.mu.Lock()
+	defer d.mu.Unlock()
+	return d.close()
+}
+
+func (d *dataFile) Delete() error {
+	d.mu.Lock()
+	defer d.mu.Unlock()
+	if err := d.close(); err != nil {
+		return err
+	}
+	err := os.Remove(d.f.Name())
+	if err != nil {
+		return err
+	}
+	d.f = nil
+	return nil
+}
+
+func (d *dataFile) close() error {
+	if d.mmap == nil {
+		return nil
+	}
+	err := syscall.Munmap(d.mmap)
+	if err != nil {
+		return err
+	}
+
+	d.mmap = nil
+	return d.f.Close()
+}
+
+func (d *dataFile) MinTime() int64 {
+	minTimePosition := d.size - minTimeOffset
+	timeBytes := d.mmap[minTimePosition : minTimePosition+timeSize]
+	return int64(btou64(timeBytes))
+}
+
+func (d *dataFile) MaxTime() int64 {
+	maxTimePosition := d.size - maxTimeOffset
+	timeBytes := d.mmap[maxTimePosition : maxTimePosition+timeSize]
+	return int64(btou64(timeBytes))
+}
+
+func (d *dataFile) SeriesCount() uint32 {
+	return btou32(d.mmap[d.size-4:])
+}
+
+func (d *dataFile) IDToPosition() map[uint64]uint32 {
+	count := int(d.SeriesCount())
+	m := make(map[uint64]uint32)
+
+	indexStart := d.size - uint32(count*12+20)
+	for i := 0; i < count; i++ {
+		offset := indexStart + uint32(i*12)
+		id := btou64(d.mmap[offset : offset+8])
+		pos := btou32(d.mmap[offset+8 : offset+12])
+		m[id] = pos
+	}
+
+	return m
+}
+
+func (d *dataFile) indexPosition() uint32 {
+	return d.size - uint32(d.SeriesCount()*12+20)
+}
+
+// StartingPositionForID returns the position in the file of the
+// first block for the given ID. If zero is returned the ID doesn't
+// have any data in this file.
+func (d *dataFile) StartingPositionForID(id uint64) uint32 {
+
+	seriesCount := d.SeriesCount()
+	indexStart := d.indexPosition()
+
+	min := uint32(0)
+	max := uint32(seriesCount)
+
+	for min < max {
+		mid := (max-min)/2 + min
+
+		offset := mid*seriesHeaderSize + indexStart
+		checkID := btou64(d.mmap[offset : offset+8])
+
+		if checkID == id {
+			return btou32(d.mmap[offset+8 : offset+12])
+		} else if checkID < id {
+			min = mid + 1
+		} else {
+			max = mid
+		}
+	}
+
+	return uint32(0)
+}
+
+func (d *dataFile) block(pos uint32) (id uint64, t int64, block []byte) {
+	defer func() {
+		if r := recover(); r != nil {
+			panic(fmt.Sprintf("panic decoding file: %s at position %d for id %d at time %d", d.f.Name(), pos, id, t))
+		}
+	}()
+	if pos < d.indexPosition() {
+		id = btou64(d.mmap[pos : pos+8])
+		length := btou32(d.mmap[pos+8 : pos+12])
+		block = d.mmap[pos+blockHeaderSize : pos+blockHeaderSize+length]
+		t = int64(btou64(d.mmap[pos+blockHeaderSize : pos+blockHeaderSize+8]))
+	}
+	return
+}
+
+type dataFiles []*dataFile
+
+func (a dataFiles) Len() int           { return len(a) }
+func (a dataFiles) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
+func (a dataFiles) Less(i, j int) bool { return a[i].MinTime() < a[j].MinTime() }
+
+// u64tob converts a uint64 into an 8-byte slice.
+func u64tob(v uint64) []byte {
+	b := make([]byte, 8)
+	binary.BigEndian.PutUint64(b, v)
+	return b
+}
+
+func btou64(b []byte) uint64 {
+	return binary.BigEndian.Uint64(b)
+}
+
+func u32tob(v uint32) []byte {
+	b := make([]byte, 4)
+	binary.BigEndian.PutUint32(b, v)
+	return b
+}
+
+func btou32(b []byte) uint32 {
+	return uint32(binary.BigEndian.Uint32(b))
+}
+
+func hashSeriesField(key string) uint64 {
+	h := fnv.New64a()
+	h.Write([]byte(key))
+	return h.Sum64()
+}
+
+// SeriesFieldKey combine a series key and field name for a unique string to be hashed to a numeric ID
+func SeriesFieldKey(seriesKey, field string) string {
+	return seriesKey + keyFieldSeparator + field
+}
+
+func seriesAndFieldFromCompositeKey(key string) (string, string) {
+	parts := strings.Split(key, keyFieldSeparator)
+	if len(parts) != 0 {
+		return parts[0], strings.Join(parts[1:], keyFieldSeparator)
+	}
+	return parts[0], parts[1]
+}
+
+type uint64slice []uint64
+
+func (a uint64slice) Len() int           { return len(a) }
+func (a uint64slice) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
+func (a uint64slice) Less(i, j int) bool { return a[i] < a[j] }
diff --git a/tsdb/engine/tsm1/tsm1_test.go b/tsdb/engine/tsm1/tsm1_test.go
new file mode 100644
index 00000000000..dbd353d7ee0
--- /dev/null
+++ b/tsdb/engine/tsm1/tsm1_test.go
@@ -0,0 +1,1379 @@
+package tsm1_test
+
+import (
+	"encoding/binary"
+	"fmt"
+	"io/ioutil"
+	"math"
+	"os"
+	"reflect"
+	"testing"
+	"time"
+
+	"github.com/influxdb/influxdb/influxql"
+	"github.com/influxdb/influxdb/models"
+	"github.com/influxdb/influxdb/tsdb"
+	"github.com/influxdb/influxdb/tsdb/engine/tsm1"
+)
+
+func TestEngine_WriteAndReadFloats(t *testing.T) {
+	e := OpenDefaultEngine()
+	defer e.Cleanup()
+
+	p1 := parsePoint("cpu,host=A value=1.1 1000000000")
+	p2 := parsePoint("cpu,host=B value=1.2 1000000000")
+	p3 := parsePoint("cpu,host=A value=2.1 2000000000")
+	p4 := parsePoint("cpu,host=B value=2.2 2000000000")
+
+	if err := e.WritePoints([]models.Point{p1, p2, p3}, nil, nil); err != nil {
+		t.Fatalf("failed to write points: %s", err.Error())
+	}
+
+	fields := []string{"value"}
+
+	verify := func(checkSingleBVal bool) {
+		tx, _ := e.Begin(false)
+		defer tx.Rollback()
+		c := tx.Cursor("cpu,host=A", fields, nil, true)
+		k, v := c.SeekTo(0)
+		if k != p1.UnixNano() {
+			t.Fatalf("p1 time wrong:\n\texp:%d\n\tgot:%d\n", p1.UnixNano(), k)
+		}
+		if 1.1 != v {
+			t.Fatal("p1 data not equal")
+		}
+		k, v = c.Next()
+		if k != p3.UnixNano() {
+			t.Fatalf("p3 time wrong:\n\texp:%d\n\tgot:%d\n", p3.UnixNano(), k)
+		}
+		if 2.1 != v {
+			t.Fatal("p3 data not equal")
+		}
+		k, v = c.Next()
+		if k != tsdb.EOF {
+			t.Fatal("expected EOF")
+		}
+
+		c = tx.Cursor("cpu,host=B", fields, nil, true)
+		k, v = c.SeekTo(0)
+		if k != p2.UnixNano() {
+			t.Fatalf("p2 time wrong:\n\texp:%d\n\tgot:%d\n", p2.UnixNano(), k)
+		}
+		if 1.2 != v {
+			t.Fatal("p2 data not equal")
+		}
+
+		if checkSingleBVal {
+			k, v = c.Next()
+			if k != tsdb.EOF {
+				t.Fatal("expected EOF")
+			}
+		}
+	}
+	verify(true)
+
+	if err := e.WritePoints([]models.Point{p4}, nil, nil); err != nil {
+		t.Fatalf("failed to write points: %s", err.Error())
+	}
+	verify(false)
+
+	tx, _ := e.Begin(false)
+	c := tx.Cursor("cpu,host=B", fields, nil, true)
+	k, v := c.SeekTo(0)
+	if k != p2.UnixNano() {
+		t.Fatalf("p2 time wrong:\n\texp:%d\n\tgot:%d\n", p2.UnixNano(), k)
+	}
+	if 1.2 != v {
+		t.Fatal("p2 data not equal")
+	}
+	k, v = c.Next()
+	if k != p4.UnixNano() {
+		t.Fatalf("p2 time wrong:\n\texp:%d\n\tgot:%d\n", p2.UnixNano(), k)
+	}
+	if 2.2 != v {
+		t.Fatal("p2 data not equal")
+	}
+
+	// verify we can seek
+	k, v = c.SeekTo(2000000000)
+	if k != p4.UnixNano() {
+		t.Fatalf("p2 time wrong:\n\texp:%d\n\tgot:%d\n", p2.UnixNano(), k)
+	}
+	if 2.2 != v {
+		t.Fatal("p2 data not equal")
+	}
+
+	c = tx.Cursor("cpu,host=A", fields, nil, true)
+	k, v = c.SeekTo(0)
+	if k != p1.UnixNano() {
+		t.Fatalf("p1 time wrong:\n\texp:%d\n\tgot:%d\n", p1.UnixNano(), k)
+	}
+	if 1.1 != v {
+		t.Fatal("p1 data not equal")
+	}
+	tx.Rollback()
+
+	if err := e.Close(); err != nil {
+		t.Fatalf("error closing: %s", err.Error())
+	}
+
+	if err := e.Open(); err != nil {
+		t.Fatalf("error opening: %s", err.Error())
+	}
+
+	verify(false)
+}
+
+func TestEngine_WriteIndexWithCollision(t *testing.T) {
+}
+
+func TestEngine_WriteIndexQueryAcrossDataFiles(t *testing.T) {
+	e := OpenDefaultEngine()
+	defer e.Cleanup()
+
+	e.RotateFileSize = 10
+
+	p1 := parsePoint("cpu,host=A value=1.1 1000000000")
+	p2 := parsePoint("cpu,host=B value=1.2 1000000000")
+	p3 := parsePoint("cpu,host=A value=2.1 4000000000")
+	p4 := parsePoint("cpu,host=B value=2.2 4000000000")
+
+	if err := e.WritePoints([]models.Point{p1, p2, p3, p4}, nil, nil); err != nil {
+		t.Fatalf("failed to write points: %s", err.Error())
+	}
+
+	p5 := parsePoint("cpu,host=A value=3.1 5000000000")
+	p6 := parsePoint("cpu,host=B value=3.2 5000000000")
+	p7 := parsePoint("cpu,host=A value=4.1 3000000000")
+	p8 := parsePoint("cpu,host=B value=4.2 3000000000")
+
+	if err := e.WritePoints([]models.Point{p5, p6, p7, p8}, nil, nil); err != nil {
+		t.Fatalf("failed to write points: %s", err.Error())
+	}
+
+	if count := e.DataFileCount(); count != 2 {
+		t.Fatalf("expected 2 data files to exist but got %d", count)
+	}
+
+	fields := []string{"value"}
+
+	verify := func(series string, points []models.Point, seek int64) {
+		tx, _ := e.Begin(false)
+		defer tx.Rollback()
+		c := tx.Cursor(series, fields, nil, true)
+
+		k, v := c.SeekTo(seek)
+		p := points[0]
+		val := p.Fields()["value"]
+		if p.UnixNano() != k || val != v {
+			t.Fatalf("expected to seek to first point\n\texp: %d %f\n\tgot: %d %f", p.UnixNano(), val, k, v)
+		}
+		points = points[1:]
+
+		for _, p := range points {
+			k, v := c.Next()
+			val := p.Fields()["value"]
+			if p.UnixNano() != k || val != v {
+				t.Fatalf("expected to seek to first point\n\texp: %d %f\n\tgot: %d %f", p.UnixNano(), val, k, v.(float64))
+			}
+		}
+	}
+
+	fmt.Println("v1")
+	verify("cpu,host=A", []models.Point{p1, p7, p3, p5}, 0)
+	fmt.Println("v2")
+	verify("cpu,host=B", []models.Point{p2, p8, p4, p6}, 0)
+	fmt.Println("v3")
+	verify("cpu,host=A", []models.Point{p5}, 5000000000)
+	fmt.Println("v4")
+	verify("cpu,host=B", []models.Point{p6}, 5000000000)
+}
+
+func TestEngine_WriteOverwritePreviousPoint(t *testing.T) {
+	e := OpenDefaultEngine()
+	defer e.Cleanup()
+
+	fields := []string{"value"}
+
+	p1 := parsePoint("cpu,host=A value=1.1 1000000000")
+	p2 := parsePoint("cpu,host=A value=1.2 1000000000")
+	p3 := parsePoint("cpu,host=A value=1.3 1000000000")
+
+	if err := e.WritePoints([]models.Point{p1, p2}, nil, nil); err != nil {
+		t.Fatalf("failed to write points: %s", err.Error())
+	}
+
+	tx, _ := e.Begin(false)
+	defer tx.Rollback()
+	c := tx.Cursor("cpu,host=A", fields, nil, true)
+	k, v := c.SeekTo(0)
+	if k != p2.UnixNano() {
+		t.Fatalf("time wrong:\n\texp:%d\n\tgot:%d\n", p2.UnixNano(), k)
+	}
+	if 1.2 != v {
+		t.Fatalf("data wrong:\n\texp:%f\n\tgot:%f", 1.2, v.(float64))
+	}
+	k, v = c.Next()
+	if k != tsdb.EOF {
+		t.Fatal("expected EOF")
+	}
+
+	if err := e.WritePoints([]models.Point{p3}, nil, nil); err != nil {
+		t.Fatalf("failed to write points: %s", err.Error())
+	}
+
+	tx2, _ := e.Begin(false)
+	defer tx2.Rollback()
+	c = tx2.Cursor("cpu,host=A", fields, nil, true)
+	k, v = c.SeekTo(0)
+	if k != p3.UnixNano() {
+		t.Fatalf("time wrong:\n\texp:%d\n\tgot:%d\n", p3.UnixNano(), k)
+	}
+	if 1.3 != v {
+		t.Fatalf("data wrong:\n\texp:%f\n\tgot:%f", 1.3, v.(float64))
+	}
+	k, v = c.Next()
+	if k != tsdb.EOF {
+		t.Fatal("expected EOF")
+	}
+}
+
+func TestEngine_CursorCombinesWALAndIndex(t *testing.T) {
+	e := OpenDefaultEngine()
+	defer e.Cleanup()
+
+	fields := []string{"value"}
+
+	p1 := parsePoint("cpu,host=A value=1.1 1000000000")
+	p2 := parsePoint("cpu,host=A value=1.2 2000000000")
+
+	if err := e.WritePoints([]models.Point{p1}, nil, nil); err != nil {
+		t.Fatalf("failed to write points: %s", err.Error())
+	}
+	e.WAL.SkipCache = false
+	if err := e.WritePoints([]models.Point{p2}, nil, nil); err != nil {
+		t.Fatalf("failed to write points: %s", err.Error())
+	}
+
+	tx, _ := e.Begin(false)
+	defer tx.Rollback()
+	c := tx.Cursor("cpu,host=A", fields, nil, true)
+	k, v := c.SeekTo(0)
+	if k != p1.UnixNano() {
+		t.Fatalf("time wrong:\n\texp:%d\n\tgot:%d\n", p1.UnixNano(), k)
+	}
+	if 1.1 != v {
+		t.Fatalf("data wrong:\n\texp:%f\n\tgot:%f", 1.1, v.(float64))
+	}
+	k, v = c.Next()
+	if k != p2.UnixNano() {
+		t.Fatalf("time wrong:\n\texp:%d\n\tgot:%d\n", p2.UnixNano(), k)
+	}
+	if 1.2 != v {
+		t.Fatalf("data wrong:\n\texp:%f\n\tgot:%f", 1.2, v.(float64))
+	}
+	k, v = c.Next()
+	if k != tsdb.EOF {
+		t.Fatal("expected EOF")
+	}
+}
+
+func TestEngine_Compaction(t *testing.T) {
+	e := OpenDefaultEngine()
+	defer e.Cleanup()
+
+	e.RotateFileSize = 10
+
+	p1 := parsePoint("cpu,host=A value=1.1 1000000000")
+	p2 := parsePoint("cpu,host=B value=1.1 1000000000")
+	if err := e.WritePoints([]models.Point{p1, p2}, nil, nil); err != nil {
+		t.Fatalf("failed to write points: %s", err.Error())
+	}
+
+	p3 := parsePoint("cpu,host=A value=2.4 4000000000")
+	p4 := parsePoint("cpu,host=B value=2.4 4000000000")
+	if err := e.WritePoints([]models.Point{p3, p4}, nil, nil); err != nil {
+		t.Fatalf("failed to write points: %s", err.Error())
+	}
+
+	p5 := parsePoint("cpu,host=A value=1.5 5000000000")
+	p6 := parsePoint("cpu,host=B value=2.5 5000000000")
+	if err := e.WritePoints([]models.Point{p5, p6}, nil, nil); err != nil {
+		t.Fatalf("failed to write points: %s", err.Error())
+	}
+
+	p7 := parsePoint("cpu,host=A value=1.5 6000000000")
+	p8 := parsePoint("cpu,host=B value=2.5 6000000000")
+	if err := e.WritePoints([]models.Point{p7, p8}, nil, nil); err != nil {
+		t.Fatalf("failed to write points: %s", err.Error())
+	}
+
+	if count := e.DataFileCount(); count != 4 {
+		t.Fatalf("expected 3 data files to exist but got %d", count)
+	}
+
+	fields := []string{"value"}
+
+	e.CompactionAge = time.Duration(0)
+
+	if err := e.Compact(true); err != nil {
+		t.Fatalf("error compacting: %s", err.Error())
+	}
+
+	if count := e.DataFileCount(); count != 1 {
+		t.Fatalf("expected compaction to reduce data file count to 1 but got %d", count)
+	}
+
+	verify := func(series string, points []models.Point, seek int64) {
+		tx, _ := e.Begin(false)
+		defer tx.Rollback()
+		c := tx.Cursor(series, fields, nil, true)
+
+		k, v := c.SeekTo(seek)
+		p := points[0]
+		val := p.Fields()["value"]
+		if p.UnixNano() != k || val != v {
+			t.Fatalf("expected to seek to first point\n\texp: %d %f\n\tgot: %d %f", p.UnixNano(), val, k, v)
+		}
+		points = points[1:]
+
+		for _, p := range points {
+			k, v := c.Next()
+			val := p.Fields()["value"]
+			if p.UnixNano() != k || val != v {
+				t.Fatalf("expected to seek to first point\n\texp: %d %f\n\tgot: %d %f", p.UnixNano(), val, k, v.(float64))
+			}
+		}
+	}
+
+	verify("cpu,host=A", []models.Point{p1, p3, p5, p7}, 0)
+	verify("cpu,host=B", []models.Point{p2, p4, p6, p8}, 0)
+	if err := e.Close(); err != nil {
+		t.Fatalf("error closing: %s", err.Error())
+	}
+	if err := e.Open(); err != nil {
+		t.Fatalf("error opening: %s", err.Error())
+	}
+	verify("cpu,host=A", []models.Point{p1, p3, p5, p7}, 0)
+	verify("cpu,host=B", []models.Point{p2, p4, p6, p8}, 0)
+}
+
+// Ensure that if two keys have the same fnv64-a id, we handle it
+func TestEngine_KeyCollisionsAreHandled(t *testing.T) {
+	e := OpenDefaultEngine()
+	defer e.Cleanup()
+
+	fields := []string{"value"}
+
+	// make sure two of these keys collide
+	e.HashSeriesField = func(key string) uint64 {
+		return 1
+	}
+	p1 := parsePoint("cpu,host=A value=1.1 1000000000")
+	p2 := parsePoint("cpu,host=B value=1.2 1000000000")
+	p3 := parsePoint("cpu,host=C value=1.3 1000000000")
+
+	if err := e.WritePoints([]models.Point{p1, p2, p3}, nil, nil); err != nil {
+		t.Fatalf("failed to write points: %s", err.Error())
+	}
+
+	verify := func(series string, points []models.Point, seek int64) {
+		tx, _ := e.Begin(false)
+		defer tx.Rollback()
+		c := tx.Cursor(series, fields, nil, true)
+
+		k, v := c.SeekTo(seek)
+		p := points[0]
+		val := p.Fields()["value"]
+		if p.UnixNano() != k || val != v {
+			t.Fatalf("expected to seek to first point\n\texp: %d %f\n\tgot: %d %f", p.UnixNano(), val, k, v)
+		}
+		points = points[1:]
+
+		for _, p := range points {
+			k, v := c.Next()
+			val := p.Fields()["value"]
+			if p.UnixNano() != k || val != v {
+				t.Fatalf("expected to seek to first point\n\texp: %d %f\n\tgot: %d %f", p.UnixNano(), val, k, v.(float64))
+			}
+		}
+	}
+
+	verify("cpu,host=A", []models.Point{p1}, 0)
+	verify("cpu,host=B", []models.Point{p2}, 0)
+	verify("cpu,host=C", []models.Point{p3}, 0)
+
+	p4 := parsePoint("cpu,host=A value=2.1 2000000000")
+	p5 := parsePoint("cpu,host=B value=2.2 2000000000")
+	p6 := parsePoint("cpu,host=C value=2.3 2000000000")
+
+	if err := e.WritePoints([]models.Point{p4, p5, p6}, nil, nil); err != nil {
+		t.Fatalf("failed to write points: %s", err.Error())
+	}
+
+	verify("cpu,host=A", []models.Point{p1, p4}, 0)
+	verify("cpu,host=B", []models.Point{p2, p5}, 0)
+	verify("cpu,host=C", []models.Point{p3, p6}, 0)
+
+	// verify collisions are handled after closing and reopening
+	if err := e.Close(); err != nil {
+		t.Fatalf("error closing: %s", err.Error())
+	}
+	if err := e.Open(); err != nil {
+		t.Fatalf("error opening: %s", err.Error())
+	}
+
+	verify("cpu,host=A", []models.Point{p1, p4}, 0)
+	verify("cpu,host=B", []models.Point{p2, p5}, 0)
+	verify("cpu,host=C", []models.Point{p3, p6}, 0)
+
+	p7 := parsePoint("cpu,host=A value=3.1 3000000000")
+	p8 := parsePoint("cpu,host=B value=3.2 3000000000")
+	p9 := parsePoint("cpu,host=C value=3.3 3000000000")
+
+	if err := e.WritePoints([]models.Point{p7, p8, p9}, nil, nil); err != nil {
+		t.Fatalf("failed to write points: %s", err.Error())
+	}
+
+	verify("cpu,host=A", []models.Point{p1, p4, p7}, 0)
+	verify("cpu,host=B", []models.Point{p2, p5, p8}, 0)
+	verify("cpu,host=C", []models.Point{p3, p6, p9}, 0)
+}
+
+func TestEngine_SupportMultipleFields(t *testing.T) {
+	e := OpenDefaultEngine()
+	defer e.Cleanup()
+
+	fields := []string{"value", "foo"}
+
+	p1 := parsePoint("cpu,host=A value=1.1 1000000000")
+	p2 := parsePoint("cpu,host=A value=1.2,foo=2.2 2000000000")
+
+	if err := e.WritePoints([]models.Point{p1, p2}, nil, nil); err != nil {
+		t.Fatalf("failed to write points: %s", err.Error())
+	}
+	tx, _ := e.Begin(false)
+	defer tx.Rollback()
+	c := tx.Cursor("cpu,host=A", fields, nil, true)
+	k, v := c.SeekTo(0)
+	if k != p1.UnixNano() {
+		t.Fatalf("time wrong:\n\texp: %d\n\tgot: %d", p1.UnixNano(), k)
+	}
+	if !reflect.DeepEqual(v, map[string]interface{}{"value": 1.1}) {
+		t.Fatalf("value wrong: %v", v)
+	}
+	k, v = c.Next()
+	if k != p2.UnixNano() {
+		t.Fatalf("time wrong:\n\texp: %d\n\tgot: %d", p2.UnixNano(), k)
+	}
+	if !reflect.DeepEqual(v, map[string]interface{}{"value": 1.2, "foo": 2.2}) {
+		t.Fatalf("value wrong: %v", v)
+	}
+	k, _ = c.Next()
+	if k != tsdb.EOF {
+		t.Fatal("expected EOF")
+	}
+
+	// verify we can update a field and it's still all good
+	p11 := parsePoint("cpu,host=A foo=2.1 1000000000")
+	if err := e.WritePoints([]models.Point{p11}, nil, nil); err != nil {
+		t.Fatalf("failed to write points: %s", err.Error())
+	}
+
+	tx2, _ := e.Begin(false)
+	defer tx2.Rollback()
+	c = tx2.Cursor("cpu,host=A", fields, nil, true)
+	k, v = c.SeekTo(0)
+	if k != p1.UnixNano() {
+		t.Fatalf("time wrong:\n\texp: %d\n\tgot: %d", p1.UnixNano(), k)
+	}
+	if !reflect.DeepEqual(v, map[string]interface{}{"value": 1.1, "foo": 2.1}) {
+		t.Fatalf("value wrong: %v", v)
+	}
+	k, v = c.Next()
+	if k != p2.UnixNano() {
+		t.Fatalf("time wrong:\n\texp: %d\n\tgot: %d", p2.UnixNano(), k)
+	}
+	if !reflect.DeepEqual(v, map[string]interface{}{"value": 1.2, "foo": 2.2}) {
+		t.Fatalf("value wrong: %v", v)
+	}
+	k, _ = c.Next()
+	if k != tsdb.EOF {
+		t.Fatal("expected EOF")
+	}
+
+	// verify it's all good with the wal in the picture
+	e.WAL.SkipCache = false
+
+	p3 := parsePoint("cpu,host=A value=1.3 3000000000")
+	p4 := parsePoint("cpu,host=A value=1.4,foo=2.4 4000000000")
+	if err := e.WritePoints([]models.Point{p3, p4}, nil, nil); err != nil {
+		t.Fatalf("failed to write points: %s", err.Error())
+	}
+
+	tx3, _ := e.Begin(false)
+	defer tx3.Rollback()
+	c = tx3.Cursor("cpu,host=A", fields, nil, true)
+	k, v = c.SeekTo(0)
+	if k != p1.UnixNano() {
+		t.Fatalf("time wrong:\n\texp: %d\n\tgot: %d", p1.UnixNano(), k)
+	}
+	if !reflect.DeepEqual(v, map[string]interface{}{"value": 1.1, "foo": 2.1}) {
+		t.Fatalf("value wrong: %v", v)
+	}
+	k, v = c.Next()
+	if k != p2.UnixNano() {
+		t.Fatalf("time wrong:\n\texp: %d\n\tgot: %d", p2.UnixNano(), k)
+	}
+	if !reflect.DeepEqual(v, map[string]interface{}{"value": 1.2, "foo": 2.2}) {
+		t.Fatalf("value wrong: %v", v)
+	}
+	k, v = c.Next()
+	if k != p3.UnixNano() {
+		t.Fatalf("time wrong:\n\texp: %d\n\tgot: %d", p3.UnixNano(), k)
+	}
+	if !reflect.DeepEqual(v, map[string]interface{}{"value": 1.3}) {
+		t.Fatalf("value wrong: %v", v)
+	}
+	k, v = c.Next()
+	if k != p4.UnixNano() {
+		t.Fatalf("time wrong:\n\texp: %d\n\tgot: %d", p2.UnixNano(), k)
+	}
+	if !reflect.DeepEqual(v, map[string]interface{}{"value": 1.4, "foo": 2.4}) {
+		t.Fatalf("value wrong: %v", v)
+	}
+	k, _ = c.Next()
+	if k != tsdb.EOF {
+		t.Fatal("expected EOF")
+	}
+
+	p33 := parsePoint("cpu,host=A foo=2.3 3000000000")
+	if err := e.WritePoints([]models.Point{p33}, nil, nil); err != nil {
+		t.Fatalf("failed to write points: %s", err.Error())
+	}
+
+	tx4, _ := e.Begin(false)
+	defer tx4.Rollback()
+	c = tx4.Cursor("cpu,host=A", fields, nil, true)
+	k, v = c.SeekTo(0)
+	if k != p1.UnixNano() {
+		t.Fatalf("time wrong:\n\texp: %d\n\tgot: %d", p1.UnixNano(), k)
+	}
+	if !reflect.DeepEqual(v, map[string]interface{}{"value": 1.1, "foo": 2.1}) {
+		t.Fatalf("value wrong: %v", v)
+	}
+	k, v = c.Next()
+	if k != p2.UnixNano() {
+		t.Fatalf("time wrong:\n\texp: %d\n\tgot: %d", p2.UnixNano(), k)
+	}
+	if !reflect.DeepEqual(v, map[string]interface{}{"value": 1.2, "foo": 2.2}) {
+		t.Fatalf("value wrong: %v", v)
+	}
+	k, v = c.Next()
+	if k != p3.UnixNano() {
+		t.Fatalf("time wrong:\n\texp: %d\n\tgot: %d", p3.UnixNano(), k)
+	}
+	if !reflect.DeepEqual(v, map[string]interface{}{"value": 1.3, "foo": 2.3}) {
+		t.Fatalf("value wrong: %v", v)
+	}
+	k, v = c.Next()
+	if k != p4.UnixNano() {
+		t.Fatalf("time wrong:\n\texp: %d\n\tgot: %d", p2.UnixNano(), k)
+	}
+	if !reflect.DeepEqual(v, map[string]interface{}{"value": 1.4, "foo": 2.4}) {
+		t.Fatalf("value wrong: %v", v)
+	}
+	k, _ = c.Next()
+	if k != tsdb.EOF {
+		t.Fatal("expected EOF")
+	}
+
+	// and ensure we can grab one of the fields
+	c = tx4.Cursor("cpu,host=A", []string{"value"}, nil, true)
+	k, v = c.SeekTo(4000000000)
+	if k != p4.UnixNano() {
+		t.Fatalf("time wrong:\n\texp: %d\n\tgot: %d", p4.UnixNano(), k)
+	}
+	if v != 1.4 {
+		t.Fatalf("value wrong: %v", v)
+	}
+	k, _ = c.Next()
+	if k != tsdb.EOF {
+		t.Fatal("expected EOF")
+	}
+}
+
+func TestEngine_WriteManyPointsToSingleSeries(t *testing.T) {
+	e := OpenDefaultEngine()
+	defer e.Cleanup()
+
+	fields := []string{"value"}
+
+	var points []models.Point
+	for i := 1; i <= 10000; i++ {
+		points = append(points, parsePoint(fmt.Sprintf("cpu,host=A value=%d %d000000000", i, i)))
+		if i%500 == 0 {
+			if err := e.WritePoints(points, nil, nil); err != nil {
+				t.Fatalf("failed to write points: %s", err.Error())
+			}
+			points = nil
+		}
+	}
+
+	tx, _ := e.Begin(false)
+	defer tx.Rollback()
+	c := tx.Cursor("cpu,host=A", fields, nil, true)
+	k, v := c.SeekTo(0)
+	for i := 2; i <= 10000; i++ {
+		k, v = c.Next()
+		if k != int64(i)*1000000000 {
+			t.Fatalf("time wrong:\n\texp: %d\n\tgot: %d", i*1000000000, k)
+		}
+		if v != float64(i) {
+			t.Fatalf("value wrong:\n\texp:%v\n\tgot:%v", float64(i), v)
+		}
+	}
+	k, _ = c.Next()
+	if k != tsdb.EOF {
+		t.Fatal("expected EOF")
+	}
+}
+
+func TestEngine_WritePointsInMultipleRequestsWithSameTime(t *testing.T) {
+	e := OpenDefaultEngine()
+	defer e.Cleanup()
+
+	fields := []string{"value"}
+
+	e.WAL.SkipCache = false
+
+	if err := e.WritePoints([]models.Point{parsePoint("foo value=1 0")}, nil, nil); err != nil {
+		t.Fatalf("failed to write points: %s", err.Error())
+	}
+	if err := e.WritePoints([]models.Point{parsePoint("foo value=2 0")}, nil, nil); err != nil {
+		t.Fatalf("failed to write points: %s", err.Error())
+	}
+	if err := e.WritePoints([]models.Point{parsePoint("foo value=3 0")}, nil, nil); err != nil {
+		t.Fatalf("failed to write points: %s", err.Error())
+	}
+
+	verify := func() {
+		tx, _ := e.Begin(false)
+		defer tx.Rollback()
+		c := tx.Cursor("foo", fields, nil, true)
+		k, v := c.SeekTo(0)
+		if k != 0 {
+			t.Fatalf("expected 0 time but got %d", k)
+		}
+		if v != float64(3) {
+			t.Fatalf("expected 3 for value but got %f", v.(float64))
+		}
+		k, _ = c.Next()
+		if k != tsdb.EOF {
+			t.Fatal("expected EOF")
+		}
+	}
+
+	verify()
+
+	if err := e.Close(); err != nil {
+		t.Fatalf("error closing: %s", err.Error())
+	}
+	if err := e.Open(); err != nil {
+		t.Fatalf("error opening: %s", err.Error())
+	}
+
+	verify()
+}
+
+func TestEngine_CursorDescendingOrder(t *testing.T) {
+	e := OpenDefaultEngine()
+	defer e.Cleanup()
+
+	fields := []string{"value"}
+
+	p1 := parsePoint("foo value=1 1")
+	p2 := parsePoint("foo value=2 2")
+
+	e.WAL.SkipCache = false
+
+	if err := e.WritePoints([]models.Point{p1, p2}, nil, nil); err != nil {
+		t.Fatalf("failed to write points: %s", err.Error())
+	}
+
+	verify := func() {
+		tx, _ := e.Begin(false)
+		defer tx.Rollback()
+		c := tx.Cursor("foo", fields, nil, false)
+		fmt.Println("seek")
+		k, v := c.SeekTo(5000000)
+		if k != 2 {
+			t.Fatalf("expected 2 time but got %d", k)
+		}
+		if v != float64(2) {
+			t.Fatalf("expected 2 for value but got %f", v.(float64))
+		}
+		fmt.Println("next1")
+		k, v = c.Next()
+		if k != 1 {
+			t.Fatalf("expected 1 time but got %d", k)
+		}
+		fmt.Println("next2")
+		if v != float64(1) {
+			t.Fatalf("expected 1 for value but got %f", v.(float64))
+		}
+		k, _ = c.Next()
+		if k != tsdb.EOF {
+			t.Fatal("expected EOF", k)
+		}
+	}
+	fmt.Println("verify 1")
+	verify()
+
+	if err := e.WAL.Flush(); err != nil {
+		t.Fatalf("error flushing WAL %s", err.Error())
+	}
+
+	fmt.Println("verify 2")
+	verify()
+
+	p3 := parsePoint("foo value=3 3")
+
+	if err := e.WritePoints([]models.Point{p3}, nil, nil); err != nil {
+		t.Fatalf("failed to write points: %s", err.Error())
+	}
+
+	func() {
+		tx, _ := e.Begin(false)
+		defer tx.Rollback()
+		c := tx.Cursor("foo", fields, nil, false)
+		k, v := c.SeekTo(234232)
+		if k != 3 {
+			t.Fatalf("expected 3 time but got %d", k)
+		}
+		if v != float64(3) {
+			t.Fatalf("expected 3 for value but got %f", v.(float64))
+		}
+		k, _ = c.Next()
+		if k != 2 {
+			t.Fatalf("expected 2 time but got %d", k)
+		}
+	}()
+}
+
+func TestEngine_CompactWithSeriesInOneFile(t *testing.T) {
+	e := OpenDefaultEngine()
+	defer e.Cleanup()
+
+	fields := []string{"value"}
+
+	e.RotateFileSize = 10
+	e.MaxPointsPerBlock = 1
+
+	p1 := parsePoint("cpu,host=A value=1.1 1000000000")
+	p2 := parsePoint("cpu,host=B value=1.2 2000000000")
+	p3 := parsePoint("cpu,host=A value=1.3 3000000000")
+
+	if err := e.WritePoints([]models.Point{p1}, nil, nil); err != nil {
+		t.Fatalf("failed to write points: %s", err.Error())
+	}
+	if err := e.WritePoints([]models.Point{p2}, nil, nil); err != nil {
+		t.Fatalf("failed to write points: %s", err.Error())
+	}
+	if err := e.WritePoints([]models.Point{p3}, nil, nil); err != nil {
+		t.Fatalf("failed to write points: %s", err.Error())
+	}
+
+	if count := e.DataFileCount(); count != 3 {
+		t.Fatalf("expected 3 data files but got %d", count)
+	}
+
+	verify := func() {
+		tx, _ := e.Begin(false)
+		defer tx.Rollback()
+		c := tx.Cursor("cpu,host=A", fields, nil, true)
+		k, v := c.SeekTo(0)
+		if k != 1000000000 {
+			t.Fatalf("expected time 1000000000 but got %d", k)
+		}
+		if v != 1.1 {
+			t.Fatalf("expected value 1.1 but got %f", v.(float64))
+		}
+		k, v = c.Next()
+		if k != 3000000000 {
+			t.Fatalf("expected time 3000000000 but got %d", k)
+		}
+		c = tx.Cursor("cpu,host=B", fields, nil, true)
+		k, v = c.SeekTo(0)
+		if k != 2000000000 {
+			t.Fatalf("expected time 2000000000 but got %d", k)
+		}
+		if v != 1.2 {
+			t.Fatalf("expected value 1.2 but got %f", v.(float64))
+		}
+	}
+
+	fmt.Println("verify 1")
+	verify()
+
+	if err := e.Compact(true); err != nil {
+		t.Fatalf("error compacting: %s", err.Error())
+	}
+	fmt.Println("verify 2")
+	verify()
+
+	p4 := parsePoint("cpu,host=A value=1.4 4000000000")
+	if err := e.WritePoints([]models.Point{p4}, nil, nil); err != nil {
+		t.Fatalf("failed to write points: %s", err.Error())
+	}
+
+	if err := e.Compact(true); err != nil {
+		t.Fatalf("error compacting: %s", err.Error())
+	}
+	tx1, _ := e.Begin(false)
+	defer tx1.Rollback()
+	c := tx1.Cursor("cpu,host=A", fields, nil, true)
+	k, v := c.SeekTo(0)
+	if k != 1000000000 {
+		t.Fatalf("expected time 1000000000 but got %d", k)
+	}
+	if v != 1.1 {
+		t.Fatalf("expected value 1.1 but got %f", v.(float64))
+	}
+	k, v = c.Next()
+	if k != 3000000000 {
+		t.Fatalf("expected time 3000000000 but got %d", k)
+	}
+	k, v = c.Next()
+	if k != 4000000000 {
+		t.Fatalf("expected time 3000000000 but got %d", k)
+	}
+}
+
+// Ensure that compactions that happen where blocks from old data files
+// skip decoding and just get copied over to the new data file works.
+func TestEngine_CompactionWithCopiedBlocks(t *testing.T) {
+	e := OpenDefaultEngine()
+	defer e.Cleanup()
+
+	fields := []string{"value"}
+
+	e.RotateFileSize = 10
+	e.MaxPointsPerBlock = 1
+	e.RotateBlockSize = 10
+
+	p1 := parsePoint("cpu,host=A value=1.1 1000000000")
+	p2 := parsePoint("cpu,host=A value=1.2 2000000000")
+	p3 := parsePoint("cpu,host=A value=1.3 3000000000")
+
+	if err := e.WritePoints([]models.Point{p1, p2}, nil, nil); err != nil {
+		t.Fatalf("failed to write points: %s", err.Error())
+	}
+	if err := e.WritePoints([]models.Point{p3}, nil, nil); err != nil {
+		t.Fatalf("failed to write points: %s", err.Error())
+	}
+
+	verify := func() {
+		tx, _ := e.Begin(false)
+		defer tx.Rollback()
+		c := tx.Cursor("cpu,host=A", fields, nil, true)
+		k, _ := c.SeekTo(0)
+		if k != 1000000000 {
+			t.Fatalf("expected time 1000000000 but got %d", k)
+		}
+		k, _ = c.Next()
+		if k != 2000000000 {
+			t.Fatalf("expected time 2000000000 but got %d", k)
+		}
+		k, _ = c.Next()
+		if k != 3000000000 {
+			t.Fatalf("expected time 3000000000 but got %d", k)
+		}
+	}
+
+	verify()
+	if err := e.Compact(true); err != nil {
+		t.Fatalf("error compacting: %s", err.Error())
+	}
+	fmt.Println("verify 2")
+	verify()
+
+	p4 := parsePoint("cpu,host=B value=1.4 4000000000")
+	if err := e.WritePoints([]models.Point{p4}, nil, nil); err != nil {
+		t.Fatalf("failed to write points: %s", err.Error())
+	}
+
+	if err := e.Compact(true); err != nil {
+		t.Fatalf("error compacting: %s", err.Error())
+	}
+	fmt.Println("verify 3")
+	verify()
+
+	p5 := parsePoint("cpu,host=A value=1.5 5000000000")
+	p6 := parsePoint("cpu,host=A value=1.6 6000000000")
+	p7 := parsePoint("cpu,host=B value=2.1 7000000000")
+	if err := e.WritePoints([]models.Point{p5, p6, p7}, nil, nil); err != nil {
+		t.Fatalf("failed to write points: %s", err.Error())
+	}
+
+	p8 := parsePoint("cpu,host=A value=1.5 7000000000")
+	p9 := parsePoint("cpu,host=A value=1.6 8000000000")
+	p10 := parsePoint("cpu,host=B value=2.1 8000000000")
+	if err := e.WritePoints([]models.Point{p8, p9, p10}, nil, nil); err != nil {
+		t.Fatalf("failed to write points: %s", err.Error())
+	}
+
+	if err := e.Compact(true); err != nil {
+		t.Fatalf("error compacting: %s", err.Error())
+	}
+	verify()
+
+}
+
+func TestEngine_RewritingOldBlocks(t *testing.T) {
+	e := OpenDefaultEngine()
+	defer e.Cleanup()
+
+	fields := []string{"value"}
+
+	e.MaxPointsPerBlock = 2
+
+	p1 := parsePoint("cpu,host=A value=1.1 1000000000")
+	p2 := parsePoint("cpu,host=A value=1.2 2000000000")
+	p3 := parsePoint("cpu,host=A value=1.3 3000000000")
+	p4 := parsePoint("cpu,host=A value=1.5 1500000000")
+
+	if err := e.WritePoints([]models.Point{p1, p2}, nil, nil); err != nil {
+		t.Fatalf("failed to write points: %s", err.Error())
+	}
+	if err := e.WritePoints([]models.Point{p3}, nil, nil); err != nil {
+		t.Fatalf("failed to write points: %s", err.Error())
+	}
+	if err := e.WritePoints([]models.Point{p4}, nil, nil); err != nil {
+		t.Fatalf("failed to write points: %s", err.Error())
+	}
+
+	tx, _ := e.Begin(false)
+	defer tx.Rollback()
+	c := tx.Cursor("cpu,host=A", fields, nil, true)
+	k, _ := c.SeekTo(0)
+	if k != 1000000000 {
+		t.Fatalf("expected time 1000000000 but got %d", k)
+	}
+	k, _ = c.Next()
+	if k != 1500000000 {
+		t.Fatalf("expected time 1500000000 but got %d", k)
+	}
+	k, _ = c.Next()
+	if k != 2000000000 {
+		t.Fatalf("expected time 2000000000 but got %d", k)
+	}
+	k, _ = c.Next()
+	if k != 3000000000 {
+		t.Fatalf("expected time 3000000000 but got %d", k)
+	}
+}
+
+func TestEngine_WriteIntoCompactedFile(t *testing.T) {
+	e := OpenDefaultEngine()
+	defer e.Cleanup()
+
+	fields := []string{"value"}
+
+	e.MaxPointsPerBlock = 3
+	e.RotateFileSize = 10
+
+	p1 := parsePoint("cpu,host=A value=1.1 1000000000")
+	p2 := parsePoint("cpu,host=A value=1.2 2000000000")
+	p3 := parsePoint("cpu,host=A value=1.3 3000000000")
+	p4 := parsePoint("cpu,host=A value=1.5 4000000000")
+	p5 := parsePoint("cpu,host=A value=1.6 2500000000")
+
+	if err := e.WritePoints([]models.Point{p1, p2}, nil, nil); err != nil {
+		t.Fatalf("failed to write points: %s", err.Error())
+	}
+	if err := e.WritePoints([]models.Point{p3}, nil, nil); err != nil {
+		t.Fatalf("failed to write points: %s", err.Error())
+	}
+
+	if err := e.Compact(true); err != nil {
+		t.Fatalf("error compacting: %s", err.Error())
+	}
+
+	if err := e.WritePoints([]models.Point{p4}, nil, nil); err != nil {
+		t.Fatalf("failed to write points: %s", err.Error())
+	}
+
+	if err := e.Compact(true); err != nil {
+		t.Fatalf("error compacting: %s", err.Error())
+	}
+
+	if err := e.WritePoints([]models.Point{p5}, nil, nil); err != nil {
+		t.Fatalf("failed to write points: %s", err.Error())
+	}
+
+	if count := e.DataFileCount(); count != 1 {
+		t.Fatalf("execpted 1 data file but got %d", count)
+	}
+
+	tx, _ := e.Begin(false)
+	defer tx.Rollback()
+	c := tx.Cursor("cpu,host=A", fields, nil, true)
+	k, _ := c.SeekTo(0)
+	if k != 1000000000 {
+		t.Fatalf("wrong time: %d", k)
+	}
+	k, _ = c.Next()
+	if k != 2000000000 {
+		t.Fatalf("wrong time: %d", k)
+	}
+	k, _ = c.Next()
+	if k != 2500000000 {
+		t.Fatalf("wrong time: %d", k)
+	}
+	k, _ = c.Next()
+	if k != 3000000000 {
+		t.Fatalf("wrong time: %d", k)
+	}
+	k, _ = c.Next()
+	if k != 4000000000 {
+		t.Fatalf("wrong time: %d", k)
+	}
+}
+
+func TestEngine_DuplicatePointsInWalAndIndex(t *testing.T) {
+	e := OpenDefaultEngine()
+	defer e.Cleanup()
+
+	fields := []string{"value"}
+	p1 := parsePoint("cpu,host=A value=1.1 1000000000")
+	p2 := parsePoint("cpu,host=A value=1.2 1000000000")
+	if err := e.WritePoints([]models.Point{p1}, nil, nil); err != nil {
+		t.Fatalf("failed to write points: %s", err.Error())
+	}
+	e.WAL.SkipCache = false
+	if err := e.WritePoints([]models.Point{p2}, nil, nil); err != nil {
+		t.Fatalf("failed to write points: %s", err.Error())
+	}
+	tx, _ := e.Begin(false)
+	defer tx.Rollback()
+	c := tx.Cursor("cpu,host=A", fields, nil, true)
+	k, v := c.SeekTo(0)
+	if k != 1000000000 {
+		t.Fatalf("wrong time: %d", k)
+	}
+	if v != 1.2 {
+		t.Fatalf("wrong value: %f", v.(float64))
+	}
+	k, _ = c.Next()
+	if k != tsdb.EOF {
+		t.Fatal("expected EOF", k)
+	}
+}
+
+func TestEngine_Deletes(t *testing.T) {
+	e := OpenDefaultEngine()
+	defer e.Cleanup()
+
+	fields := []string{"value"}
+	// Create metadata.
+	mf := &tsdb.MeasurementFields{Fields: make(map[string]*tsdb.Field)}
+	mf.CreateFieldIfNotExists("value", influxql.Float, false)
+	atag := map[string]string{"host": "A"}
+	btag := map[string]string{"host": "B"}
+	seriesToCreate := []*tsdb.SeriesCreate{
+		{Series: tsdb.NewSeries(string(models.MakeKey([]byte("cpu"), atag)), atag)},
+		{Series: tsdb.NewSeries(string(models.MakeKey([]byte("cpu"), btag)), btag)},
+	}
+
+	p1 := parsePoint("cpu,host=A value=1.1 1000000001")
+	p2 := parsePoint("cpu,host=A value=1.2 2000000001")
+	p3 := parsePoint("cpu,host=B value=2.1 1000000000")
+	p4 := parsePoint("cpu,host=B value=2.1 2000000000")
+
+	e.SkipCompaction = true
+	e.WAL.SkipCache = false
+
+	if err := e.WritePoints([]models.Point{p1, p3}, map[string]*tsdb.MeasurementFields{"cpu": mf}, seriesToCreate); err != nil {
+		t.Fatalf("failed to write points: %s", err.Error())
+	}
+
+	func() {
+		tx, _ := e.Begin(false)
+		defer tx.Rollback()
+		c := tx.Cursor("cpu,host=A", fields, nil, true)
+		k, _ := c.SeekTo(0)
+		if k != p1.UnixNano() {
+			t.Fatalf("time wrong:\n\texp:%d\n\tgot:%d\n", p1.UnixNano(), k)
+		}
+	}()
+
+	if err := e.DeleteSeries([]string{"cpu,host=A"}); err != nil {
+		t.Fatalf("failed to delete series: %s", err.Error())
+	}
+
+	func() {
+		tx, _ := e.Begin(false)
+		defer tx.Rollback()
+		c := tx.Cursor("cpu,host=B", fields, nil, true)
+		k, _ := c.SeekTo(0)
+		if k != p3.UnixNano() {
+			t.Fatalf("time wrong:\n\texp:%d\n\tgot:%d\n", p1.UnixNano(), k)
+		}
+		c = tx.Cursor("cpu,host=A", fields, nil, true)
+		k, _ = c.SeekTo(0)
+		if k != tsdb.EOF {
+			t.Fatal("expected EOF", k)
+		}
+	}()
+
+	if err := e.WritePoints([]models.Point{p2, p4}, nil, nil); err != nil {
+		t.Fatalf("failed to write points: %s", err.Error())
+	}
+
+	if err := e.WAL.Flush(); err != nil {
+		t.Fatalf("error flushing wal: %s", err.Error())
+	}
+
+	func() {
+		tx, _ := e.Begin(false)
+		defer tx.Rollback()
+		c := tx.Cursor("cpu,host=A", fields, nil, true)
+		k, _ := c.SeekTo(0)
+		if k != p2.UnixNano() {
+			t.Fatalf("time wrong:\n\texp:%d\n\tgot:%d\n", p1.UnixNano(), k)
+		}
+	}()
+
+	if err := e.DeleteSeries([]string{"cpu,host=A"}); err != nil {
+		t.Fatalf("failed to delete series: %s", err.Error())
+	}
+
+	// we already know the delete on the wal works. open and close so
+	// the wal flushes to the index. To verify that the delete gets
+	// persisted and will go all the way through the index
+
+	if err := e.Close(); err != nil {
+		t.Fatalf("error closing: %s", err.Error())
+	}
+	if err := e.Open(); err != nil {
+		t.Fatalf("error opening: %s", err.Error())
+	}
+
+	verify := func() {
+		tx, _ := e.Begin(false)
+		defer tx.Rollback()
+		c := tx.Cursor("cpu,host=B", fields, nil, true)
+		k, _ := c.SeekTo(0)
+		if k != p3.UnixNano() {
+			t.Fatalf("time wrong:\n\texp:%d\n\tgot:%d\n", p1.UnixNano(), k)
+		}
+		c = tx.Cursor("cpu,host=A", fields, nil, true)
+		k, _ = c.SeekTo(0)
+		if k != tsdb.EOF {
+			t.Fatal("expected EOF")
+		}
+	}
+
+	fmt.Println("verify 1")
+	verify()
+
+	// open and close to verify thd delete was persisted
+	if err := e.Close(); err != nil {
+		t.Fatalf("error closing: %s", err.Error())
+	}
+	if err := e.Open(); err != nil {
+		t.Fatalf("error opening: %s", err.Error())
+	}
+
+	fmt.Println("verify 2")
+	verify()
+
+	if err := e.DeleteSeries([]string{"cpu,host=B"}); err != nil {
+		t.Fatalf("failed to delete series: %s", err.Error())
+	}
+
+	func() {
+		tx, _ := e.Begin(false)
+		defer tx.Rollback()
+		c := tx.Cursor("cpu,host=B", fields, nil, true)
+		k, _ := c.SeekTo(0)
+		if k != tsdb.EOF {
+			t.Fatal("expected EOF")
+		}
+	}()
+
+	if err := e.WAL.Flush(); err != nil {
+		t.Fatalf("error flushing: %s", err.Error())
+	}
+
+	func() {
+		tx, _ := e.Begin(false)
+		defer tx.Rollback()
+		c := tx.Cursor("cpu,host=B", fields, nil, true)
+		k, _ := c.SeekTo(0)
+		if k != tsdb.EOF {
+			t.Fatal("expected EOF")
+		}
+	}()
+
+	// open and close to verify thd delete was persisted
+	if err := e.Close(); err != nil {
+		t.Fatalf("error closing: %s", err.Error())
+	}
+	if err := e.Open(); err != nil {
+		t.Fatalf("error opening: %s", err.Error())
+	}
+
+	func() {
+		tx, _ := e.Begin(false)
+		defer tx.Rollback()
+		c := tx.Cursor("cpu,host=B", fields, nil, true)
+		k, _ := c.SeekTo(0)
+		if k != tsdb.EOF {
+			t.Fatal("expected EOF")
+		}
+	}()
+}
+
+func TestEngine_IndexGoodAfterFlush(t *testing.T) {
+	e := OpenDefaultEngine()
+	defer e.Cleanup()
+
+	fields := []string{"value"}
+
+	p1 := parsePoint("test,tag=a value=2.5 1443916800000000000")
+	p2 := parsePoint("test value=3.5 1443916810000000000")
+	p3 := parsePoint("test,tag=b value=6.5 1443916860000000000")
+	p4 := parsePoint("test value=8.5 1443916861000000000")
+
+	e.SkipCompaction = true
+	e.WAL.SkipCache = false
+
+	for _, p := range []models.Point{p1, p2, p3, p4} {
+		if err := e.WritePoints([]models.Point{p}, nil, nil); err != nil {
+			t.Fatalf("failed to write points: %s", err.Error())
+		}
+	}
+
+	verify := func() {
+		tx, _ := e.Begin(false)
+		defer tx.Rollback()
+		c1 := tx.Cursor("test", fields, nil, true)
+		c2 := tx.Cursor("test,tag=a", fields, nil, true)
+		c3 := tx.Cursor("test,tag=b", fields, nil, true)
+		k, v := c1.SeekTo(1443916800000000001)
+		if k != p2.UnixNano() {
+			t.Fatalf("time wrong: %d", k)
+		}
+		if v != 3.5 {
+			t.Fatalf("value wrong: %f", v.(float64))
+		}
+		k, v = c1.Next()
+		if k != p4.UnixNano() {
+			t.Fatalf("time wrong: %d", k)
+		}
+		if v != 8.5 {
+			t.Fatalf("value wrong: %f", v.(float64))
+		}
+		if k, _ := c1.Next(); k != tsdb.EOF {
+			t.Fatalf("expected EOF: %d", k)
+		}
+		k, _ = c2.SeekTo(1443916800000000001)
+		if k != tsdb.EOF {
+			t.Fatalf("time wrong: %d", k)
+		}
+		k, v = c3.SeekTo(1443916800000000001)
+		if k != p3.UnixNano() {
+			t.Fatalf("time wrong: %d", k)
+		}
+		if v != 6.5 {
+			t.Fatalf("value wrong: %f", v.(float64))
+		}
+		if k, _ := c3.Next(); k != tsdb.EOF {
+			t.Fatalf("expected EOF: %d", k)
+		}
+	}
+
+	fmt.Println("verify1")
+	verify()
+	fmt.Println("flush")
+	if err := e.WAL.Flush(); err != nil {
+		t.Fatalf("error flushing: %s", err.Error())
+	}
+	fmt.Println("verify2")
+	verify()
+}
+
+// Engine represents a test wrapper for tsm1.Engine.
+type Engine struct {
+	*tsm1.Engine
+}
+
+// NewEngine returns a new instance of Engine.
+func NewEngine(opt tsdb.EngineOptions) *Engine {
+	dir, err := ioutil.TempDir("", "tsm1-test")
+	if err != nil {
+		panic("couldn't get temp dir")
+	}
+
+	// Create test wrapper and attach mocks.
+	e := &Engine{
+		Engine: tsm1.NewEngine(dir, dir, opt).(*tsm1.Engine),
+	}
+
+	return e
+}
+
+// OpenEngine returns an opened instance of Engine. Panic on error.
+func OpenEngine(opt tsdb.EngineOptions) *Engine {
+	e := NewEngine(opt)
+	if err := e.Open(); err != nil {
+		panic(err)
+	}
+	e.WAL.SkipCache = true
+	e.SkipCompaction = true
+	return e
+}
+
+// OpenDefaultEngine returns an open Engine with default options.
+func OpenDefaultEngine() *Engine { return OpenEngine(tsdb.NewEngineOptions()) }
+
+// Cleanup closes the engine and removes all data.
+func (e *Engine) Cleanup() error {
+	e.Engine.Close()
+	os.RemoveAll(e.Path())
+	return nil
+}
+
+func parsePoints(buf string) []models.Point {
+	points, err := models.ParsePointsString(buf)
+	if err != nil {
+		panic(fmt.Sprintf("couldn't parse points: %s", err.Error()))
+	}
+	return points
+}
+
+func parsePoint(buf string) models.Point {
+	return parsePoints(buf)[0]
+}
+
+func inttob(v int) []byte {
+	b := make([]byte, 8)
+	binary.BigEndian.PutUint64(b, uint64(v))
+	return b
+}
+
+func btou64(b []byte) uint64 {
+	return binary.BigEndian.Uint64(b)
+}
+
+func u64tob(v uint64) []byte {
+	b := make([]byte, 8)
+	binary.BigEndian.PutUint64(b, v)
+	return b
+}
+
+func btof64(b []byte) float64 {
+	return math.Float64frombits(binary.BigEndian.Uint64(b))
+}
diff --git a/tsdb/engine/tsm1/tx.go b/tsdb/engine/tsm1/tx.go
new file mode 100644
index 00000000000..54653c28728
--- /dev/null
+++ b/tsdb/engine/tsm1/tx.go
@@ -0,0 +1,69 @@
+package tsm1
+
+import (
+	"io"
+
+	"github.com/influxdb/influxdb/tsdb"
+)
+
+type tx struct {
+	files  dataFiles
+	engine *Engine
+}
+
+// TODO: handle multiple fields and descending
+func (t *tx) Cursor(series string, fields []string, dec *tsdb.FieldCodec, ascending bool) tsdb.Cursor {
+	t.engine.filesLock.RLock()
+	defer t.engine.filesLock.RUnlock()
+
+	// don't add the overhead of the multifield cursor if we only have one field
+	if len(fields) == 1 {
+		id := t.engine.keyAndFieldToID(series, fields[0])
+		_, isDeleted := t.engine.deletes[id]
+
+		var indexCursor tsdb.Cursor
+		if isDeleted {
+			indexCursor = &emptyCursor{ascending: ascending}
+		} else {
+			indexCursor = newCursor(id, t.files, ascending)
+		}
+		wc := t.engine.WAL.Cursor(series, fields, dec, ascending)
+		return NewCombinedEngineCursor(wc, indexCursor, ascending)
+	}
+
+	// multiple fields. use just the MultiFieldCursor, which also handles time collisions
+	// so we don't need to use the combined cursor
+	cursors := make([]tsdb.Cursor, 0)
+	cursorFields := make([]string, 0)
+	for _, field := range fields {
+		id := t.engine.keyAndFieldToID(series, field)
+		_, isDeleted := t.engine.deletes[id]
+
+		var indexCursor tsdb.Cursor
+		if isDeleted {
+			indexCursor = &emptyCursor{ascending: ascending}
+		} else {
+			indexCursor = newCursor(id, t.files, ascending)
+		}
+		wc := t.engine.WAL.Cursor(series, []string{field}, dec, ascending)
+		// double up the fields since there's one for the wal and one for the index
+		cursorFields = append(cursorFields, field, field)
+		cursors = append(cursors, indexCursor, wc)
+	}
+
+	return NewMultiFieldCursor(cursorFields, cursors, ascending)
+}
+
+func (t *tx) Rollback() error {
+	t.engine.queryLock.RUnlock()
+	for _, f := range t.files {
+		f.mu.RUnlock()
+	}
+
+	return nil
+}
+
+// TODO: refactor the Tx interface to not have Size, Commit, or WriteTo since they're not used
+func (t *tx) Size() int64                              { panic("not implemented") }
+func (t *tx) Commit() error                            { panic("not implemented") }
+func (t *tx) WriteTo(w io.Writer) (n int64, err error) { panic("not implemented") }
diff --git a/tsdb/engine/tsm1/wal.go b/tsdb/engine/tsm1/wal.go
new file mode 100644
index 00000000000..4f6607d02ae
--- /dev/null
+++ b/tsdb/engine/tsm1/wal.go
@@ -0,0 +1,792 @@
+package tsm1
+
+import (
+	"encoding/json"
+	"fmt"
+	"io"
+	"log"
+	"os"
+	"path/filepath"
+	"sort"
+	"strconv"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/influxdb/influxdb/models"
+	"github.com/influxdb/influxdb/tsdb"
+
+	"github.com/golang/snappy"
+)
+
+const (
+	// DefaultSegmentSize of 2MB is the size at which segment files will be rolled over
+	DefaultSegmentSize = 2 * 1024 * 1024
+
+	// FileExtension is the file extension we expect for wal segments
+	WALFileExtension = "wal"
+
+	WALFilePrefix = "_"
+
+	writeBufLen = 32 << 10 // 32kb
+)
+
+// flushType indiciates why a flush and compaction are being run so the partition can
+// do the appropriate type of compaction
+type flushType int
+
+const (
+	// noFlush indicates that no flush or compaction are necesssary at this time
+	noFlush flushType = iota
+	// memoryFlush indicates that we should look for the series using the most
+	// memory to flush out and compact all others
+	memoryFlush
+	// idleFlush indicates that we should flush all series in the parition,
+	// delete all segment files and hold off on opening a new one
+	idleFlush
+	// deleteFlush indicates that we're flushing because series need to be removed from the WAL
+	deleteFlush
+	// startupFlush indicates that we're flushing because the database is starting up
+	startupFlush
+)
+
+// walEntry is a byte written to a wal segment file that indicates what the following compressed block contains
+type walEntryType byte
+
+const (
+	pointsEntry walEntryType = 0x01
+	fieldsEntry walEntryType = 0x02
+	seriesEntry walEntryType = 0x03
+	deleteEntry walEntryType = 0x04
+)
+
+type Log struct {
+	path string
+
+	flushCheckTimer    *time.Timer // check this often to see if a background flush should happen
+	flushCheckInterval time.Duration
+
+	// write variables
+	writeLock          sync.Mutex
+	currentSegmentID   int
+	currentSegmentFile *os.File
+	currentSegmentSize int
+
+	// cache and flush variables
+	cacheLock              sync.RWMutex
+	lastWriteTime          time.Time
+	flushRunning           bool
+	cache                  map[string]Values
+	cacheDirtySort         map[string]bool   // this map should be small, only for dirty vals
+	flushCache             map[string]Values // temporary map while flushing
+	memorySize             int
+	measurementFieldsCache map[string]*tsdb.MeasurementFields
+	seriesToCreateCache    []*tsdb.SeriesCreate
+
+	// LogOutput is the writer used by the logger.
+	LogOutput io.Writer
+	logger    *log.Logger
+
+	// FlushColdInterval is the period of time after which a partition will do a
+	// full flush and compaction if it has been cold for writes.
+	FlushColdInterval time.Duration
+
+	// SegmentSize is the file size at which a segment file will be rotated
+	SegmentSize int
+
+	// FlushMemorySizeThreshold specifies when the log should be forced to be flushed
+	FlushMemorySizeThreshold int
+
+	// MaxMemorySizeThreshold specifies the limit at which writes to the WAL should be rejected
+	MaxMemorySizeThreshold int
+
+	// Index is the database series will be flushed to
+	Index IndexWriter
+
+	// LoggingEnabled specifies if detailed logs should be output
+	LoggingEnabled bool
+
+	// SkipCache specifies if the wal should immediately write to the index instead of
+	// caching data in memory. False by default so we buffer in memory before flushing to index.
+	SkipCache bool
+
+	// SkipDurability specifies if the wal should not write the wal entries to disk.
+	// False by default which means all writes are durable even when cached before flushing to index.
+	SkipDurability bool
+}
+
+// IndexWriter is an interface for the indexed database the WAL flushes data to
+type IndexWriter interface {
+	Write(valuesByKey map[string]Values, measurementFieldsToSave map[string]*tsdb.MeasurementFields, seriesToCreate []*tsdb.SeriesCreate) error
+	MarkDeletes(keys []string)
+	MarkMeasurementDelete(name string)
+}
+
+func NewLog(path string) *Log {
+	return &Log{
+		path: path,
+
+		// these options should be overriden by any options in the config
+		LogOutput:                os.Stderr,
+		FlushColdInterval:        tsdb.DefaultFlushColdInterval,
+		SegmentSize:              DefaultSegmentSize,
+		FlushMemorySizeThreshold: tsdb.DefaultFlushMemorySizeThreshold,
+		MaxMemorySizeThreshold:   tsdb.DefaultMaxMemorySizeThreshold,
+		logger:                   log.New(os.Stderr, "[tsm1wal] ", log.LstdFlags),
+	}
+}
+
+// Open opens and initializes the Log. Will recover from previous unclosed shutdowns
+func (l *Log) Open() error {
+
+	if l.LoggingEnabled {
+		l.logger.Printf("tsm1 WAL starting with %d flush memory size threshold and %d max memory size threshold\n", l.FlushMemorySizeThreshold, l.MaxMemorySizeThreshold)
+		l.logger.Printf("tsm1 WAL writing to %s\n", l.path)
+	}
+	if err := os.MkdirAll(l.path, 0777); err != nil {
+		return err
+	}
+
+	l.cache = make(map[string]Values)
+	l.cacheDirtySort = make(map[string]bool)
+	l.measurementFieldsCache = make(map[string]*tsdb.MeasurementFields)
+
+	// flush out any WAL entries that are there from before
+	if err := l.readAndFlushWAL(); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+// Cursor will return a cursor object to Seek and iterate with Next for the WAL cache for the given.
+// This should only ever be called by the engine cursor method, which will always give it
+// exactly one field.
+func (l *Log) Cursor(series string, fields []string, dec *tsdb.FieldCodec, ascending bool) tsdb.Cursor {
+	l.cacheLock.RLock()
+	defer l.cacheLock.RUnlock()
+
+	if len(fields) != 1 {
+		panic("wal cursor should only ever be called with 1 field")
+	}
+	ck := SeriesFieldKey(series, fields[0])
+	values := l.cache[ck]
+
+	// if we're in the middle of a flush, combine the previous cache
+	// with this one for the cursor
+	if l.flushCache != nil {
+		if fc, ok := l.flushCache[ck]; ok {
+			c := make([]Value, len(fc), len(fc)+len(values))
+			copy(c, fc)
+			c = append(c, values...)
+
+			return newWALCursor(Values(c).Deduplicate(), ascending)
+		}
+	}
+
+	if l.cacheDirtySort[ck] {
+		values = Values(values).Deduplicate()
+	}
+
+	// build a copy so writes afterwards don't change the result set
+	a := make([]Value, len(values))
+	copy(a, values)
+	return newWALCursor(a, ascending)
+}
+
+func (l *Log) WritePoints(points []models.Point, fields map[string]*tsdb.MeasurementFields, series []*tsdb.SeriesCreate) error {
+	// add everything to the cache, or return an error if we've hit our max memory
+	if addedToCache := l.addToCache(points, fields, series, true); !addedToCache {
+		return fmt.Errorf("WAL backed up flushing to index, hit max memory")
+	}
+
+	// make the write durable if specified
+	if !l.SkipDurability {
+		// write the points
+		pointStrings := make([]string, len(points))
+		for i, p := range points {
+			pointStrings[i] = p.String()
+		}
+		data := strings.Join(pointStrings, "\n")
+		compressed := snappy.Encode(nil, []byte(data))
+
+		if err := l.writeToLog(pointsEntry, compressed); err != nil {
+			return err
+		}
+
+		// write the new fields
+		if len(fields) > 0 {
+			data, err := json.Marshal(fields)
+			if err != nil {
+				return err
+			}
+			compressed = snappy.Encode(compressed, data)
+			if err := l.writeToLog(fieldsEntry, compressed); err != nil {
+				return err
+			}
+		}
+
+		// write the new series
+		if len(series) > 0 {
+			data, err := json.Marshal(series)
+			if err != nil {
+				return err
+			}
+			compressed = snappy.Encode(compressed, data)
+			if err := l.writeToLog(seriesEntry, compressed); err != nil {
+				return err
+			}
+		}
+	}
+
+	// usually skipping the cache is only for testing purposes and this was the easiest
+	// way to represent the logic (to cache and then immediately flush)
+	if l.SkipCache {
+		l.flush(idleFlush)
+	}
+
+	return nil
+}
+
+// addToCache will add the points, measurements, and fields to the cache and return true if successful. They will be queryable
+// immediately after return and will be flushed at the next flush cycle. Before adding to the cache we check if we're over the
+// max memory threshold. If we are we request a flush in a new goroutine and return false, indicating we didn't add the values
+// to the cache and that writes should return a failure.
+func (l *Log) addToCache(points []models.Point, fields map[string]*tsdb.MeasurementFields, series []*tsdb.SeriesCreate, checkMemory bool) bool {
+	l.cacheLock.Lock()
+	defer l.cacheLock.Unlock()
+
+	// if we should check memory and we're over the threshold, mark a flush as running and kick one off in a goroutine
+	if checkMemory && l.memorySize > l.FlushMemorySizeThreshold {
+		if !l.flushRunning {
+			l.flushRunning = true
+			go l.flush(memoryFlush)
+		}
+		if l.memorySize > l.MaxMemorySizeThreshold {
+			return false
+		}
+	}
+
+	for _, p := range points {
+		for name, value := range p.Fields() {
+			k := SeriesFieldKey(string(p.Key()), name)
+			v := NewValue(p.Time(), value)
+			cacheValues := l.cache[k]
+
+			// only mark it as dirty if it isn't already
+			if _, ok := l.cacheDirtySort[k]; !ok && len(cacheValues) > 0 {
+				dirty := cacheValues[len(cacheValues)-1].Time().UnixNano() >= v.Time().UnixNano()
+				if dirty {
+					l.cacheDirtySort[k] = true
+				}
+			}
+			l.memorySize += v.Size()
+			l.cache[k] = append(cacheValues, v)
+		}
+	}
+
+	for k, v := range fields {
+		l.measurementFieldsCache[k] = v
+	}
+	l.seriesToCreateCache = append(l.seriesToCreateCache, series...)
+	l.lastWriteTime = time.Now()
+
+	return true
+}
+
+func (l *Log) LastWriteTime() time.Time {
+	l.cacheLock.RLock()
+	defer l.cacheLock.RUnlock()
+	return l.lastWriteTime
+}
+
+// readAndFlushWAL is called on open and will read the segment files in, flushing whenever
+// the memory gets over the limit. Once all files have been read it will flush and remove the files
+func (l *Log) readAndFlushWAL() error {
+	files, err := l.segmentFileNames()
+	if err != nil {
+		return err
+	}
+
+	// read all the segment files and cache them, flushing along the way if we
+	// hit memory limits
+	for _, fn := range files {
+		if err := l.readFileToCache(fn); err != nil {
+			return err
+		}
+
+		if l.memorySize > l.MaxMemorySizeThreshold {
+			if err := l.flush(memoryFlush); err != nil {
+				return err
+			}
+		}
+	}
+
+	// now flush and remove all the old files
+	if err := l.flush(startupFlush); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+func (l *Log) readFileToCache(fileName string) error {
+	f, err := os.OpenFile(fileName, os.O_RDONLY, 0666)
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+
+	buf := make([]byte, writeBufLen)
+	data := make([]byte, writeBufLen)
+	for {
+		// read the type and the length of the entry
+		_, err := io.ReadFull(f, buf[0:5])
+		if err == io.EOF {
+			return nil
+		} else if err != nil {
+			l.logger.Printf("error reading segment file %s: %s", fileName, err.Error())
+			return err
+		}
+		entryType := buf[0]
+		length := btou32(buf[1:5])
+
+		// read the compressed block and decompress it
+		if int(length) > len(buf) {
+			buf = make([]byte, length)
+		}
+		_, err = io.ReadFull(f, buf[0:length])
+		if err == io.EOF || err == io.ErrUnexpectedEOF {
+			l.logger.Printf("hit end of file while reading compressed wal entry from %s", fileName)
+			return nil
+		} else if err != nil {
+			return err
+		}
+		data, err = snappy.Decode(data, buf[0:length])
+		if err != nil {
+			l.logger.Printf("error decoding compressed entry from %s: %s", fileName, err.Error())
+			return nil
+		}
+
+		// and marshal it and send it to the cache
+		switch walEntryType(entryType) {
+		case pointsEntry:
+			points, err := models.ParsePoints(data)
+			if err != nil {
+				return err
+			}
+			l.addToCache(points, nil, nil, false)
+		case fieldsEntry:
+			fields := make(map[string]*tsdb.MeasurementFields)
+			if err := json.Unmarshal(data, &fields); err != nil {
+				return err
+			}
+			l.addToCache(nil, fields, nil, false)
+		case seriesEntry:
+			series := make([]*tsdb.SeriesCreate, 0)
+			if err := json.Unmarshal(data, &series); err != nil {
+				return err
+			}
+			l.addToCache(nil, nil, series, false)
+		case deleteEntry:
+			d := &deleteData{}
+			if err := json.Unmarshal(data, &d); err != nil {
+				return err
+			}
+			l.Index.MarkDeletes(d.Keys)
+			l.Index.MarkMeasurementDelete(d.MeasurementName)
+			l.deleteKeysFromCache(d.Keys)
+			if d.MeasurementName != "" {
+				l.deleteMeasurementFromCache(d.MeasurementName)
+			}
+		}
+	}
+}
+
+func (l *Log) writeToLog(writeType walEntryType, data []byte) error {
+	l.writeLock.Lock()
+	defer l.writeLock.Unlock()
+
+	if l.currentSegmentFile == nil || l.currentSegmentSize > DefaultSegmentSize {
+		if err := l.newSegmentFile(); err != nil {
+			// fail hard since we can't write data
+			panic(fmt.Sprintf("error opening new segment file for wal: %s", err.Error()))
+		}
+	}
+
+	// The panics here are an intentional choice. Based on reports from users
+	// it's better to fail hard if the database can't take writes. Then they'll
+	// get alerted and fix whatever is broken. Remove these and face Paul's wrath.
+	if _, err := l.currentSegmentFile.Write([]byte{byte(writeType)}); err != nil {
+		panic(fmt.Sprintf("error writing type to wal: %s", err.Error()))
+	}
+	if _, err := l.currentSegmentFile.Write(u32tob(uint32(len(data)))); err != nil {
+		panic(fmt.Sprintf("error writing len to wal: %s", err.Error()))
+	}
+	if _, err := l.currentSegmentFile.Write(data); err != nil {
+		panic(fmt.Sprintf("error writing data to wal: %s", err.Error()))
+	}
+
+	l.currentSegmentSize += 5 + len(data)
+
+	return l.currentSegmentFile.Sync()
+}
+
+// Flush will force a flush of the WAL to the index
+func (l *Log) Flush() error {
+	return l.flush(idleFlush)
+}
+
+func (l *Log) DeleteMeasurement(measurement string, keys []string) error {
+	d := &deleteData{MeasurementName: measurement, Keys: keys}
+	err := l.writeDeleteEntry(d)
+	if err != nil {
+		return err
+	}
+
+	l.deleteKeysFromCache(keys)
+	l.deleteMeasurementFromCache(measurement)
+
+	return nil
+}
+
+func (l *Log) deleteMeasurementFromCache(name string) {
+	l.cacheLock.Lock()
+	defer l.cacheLock.Unlock()
+	delete(l.measurementFieldsCache, name)
+}
+
+func (l *Log) writeDeleteEntry(d *deleteData) error {
+	js, err := json.Marshal(d)
+	if err != nil {
+		return err
+	}
+	data := snappy.Encode(nil, js)
+	return l.writeToLog(deleteEntry, data)
+}
+
+func (l *Log) DeleteSeries(keys []string) error {
+	l.deleteKeysFromCache(keys)
+
+	return l.writeDeleteEntry(&deleteData{Keys: keys})
+}
+
+func (l *Log) deleteKeysFromCache(keys []string) {
+	seriesKeys := make(map[string]bool)
+	for _, k := range keys {
+		series, _ := seriesAndFieldFromCompositeKey(k)
+		seriesKeys[series] = true
+	}
+
+	l.cacheLock.Lock()
+	defer l.cacheLock.Unlock()
+
+	for _, k := range keys {
+		delete(l.cache, k)
+	}
+
+	// now remove any of these that are marked for creation
+	var seriesCreate []*tsdb.SeriesCreate
+	for _, sc := range l.seriesToCreateCache {
+		if _, ok := seriesKeys[sc.Series.Key]; !ok {
+			seriesCreate = append(seriesCreate, sc)
+		}
+	}
+	l.seriesToCreateCache = seriesCreate
+}
+
+// Close will finish any flush that is currently in process and close file handles
+func (l *Log) Close() error {
+	l.writeLock.Lock()
+	l.cacheLock.Lock()
+	defer l.writeLock.Unlock()
+	defer l.cacheLock.Unlock()
+
+	l.cache = nil
+	l.measurementFieldsCache = nil
+	l.seriesToCreateCache = nil
+	if l.currentSegmentFile == nil {
+		return nil
+	}
+	if err := l.currentSegmentFile.Close(); err != nil {
+		return err
+	}
+	l.currentSegmentFile = nil
+
+	return nil
+}
+
+// close all the open Log partitions and file handles
+func (l *Log) close() error {
+	l.cache = nil
+	l.cacheDirtySort = nil
+	if l.currentSegmentFile == nil {
+		return nil
+	}
+	if err := l.currentSegmentFile.Close(); err != nil {
+		return err
+	}
+	l.currentSegmentFile = nil
+
+	return nil
+}
+
+// flush writes all wal data in memory to the index
+func (l *Log) flush(flush flushType) error {
+	// only flush if there isn't one already running. Memory flushes are only triggered
+	// by writes, which will mark the flush as running, so we can ignore it.
+	l.cacheLock.Lock()
+
+	if l.flushRunning && flush != memoryFlush {
+		l.cacheLock.Unlock()
+		return nil
+	}
+
+	// mark the flush as running and ensure that it gets marked as not running when we return
+	l.flushRunning = true
+	defer func() {
+		l.cacheLock.Lock()
+		l.flushRunning = false
+		l.cacheLock.Unlock()
+	}()
+
+	// only hold the lock while we rotate the segment file
+	l.writeLock.Lock()
+	lastFileID := l.currentSegmentID
+	// if it's an idle flush, don't open a new segment file
+	if flush == idleFlush {
+		if l.currentSegmentFile != nil {
+			if err := l.currentSegmentFile.Close(); err != nil {
+				return err
+			}
+			l.currentSegmentFile = nil
+			l.currentSegmentSize = 0
+		}
+	} else {
+		if err := l.newSegmentFile(); err != nil {
+			// there's no recovering from this, fail hard
+			panic(fmt.Sprintf("error creating new wal file: %s", err.Error()))
+		}
+	}
+	l.writeLock.Unlock()
+
+	// copy the cache items to new maps so we can empty them out
+	l.flushCache = make(map[string]Values)
+	valueCount := 0
+	for key, v := range l.cache {
+		l.flushCache[key] = v
+		valueCount += len(v)
+	}
+	l.cache = make(map[string]Values)
+	for k, _ := range l.cacheDirtySort {
+		l.flushCache[k] = l.flushCache[k].Deduplicate()
+	}
+	l.cacheDirtySort = make(map[string]bool)
+
+	flushSize := l.memorySize
+
+	// reset the memory being used by the cache
+	l.memorySize = 0
+
+	// reset the measurements for flushing
+	mfc := l.measurementFieldsCache
+	l.measurementFieldsCache = make(map[string]*tsdb.MeasurementFields)
+
+	// reset the series for flushing
+	scc := l.seriesToCreateCache
+	l.seriesToCreateCache = nil
+
+	l.cacheLock.Unlock()
+
+	// exit if there's nothing to flush to the index
+	if len(l.flushCache) == 0 && len(mfc) == 0 && len(scc) == 0 && flush != startupFlush {
+		return nil
+	}
+
+	if l.LoggingEnabled {
+		ftype := "idle"
+		if flush == memoryFlush {
+			ftype = "memory"
+		} else if flush == startupFlush {
+			ftype = "startup"
+		}
+		l.logger.Printf("%s flush of %s with %d keys and %d total values of %d bytes\n", ftype, l.path, len(l.flushCache), valueCount, flushSize)
+	}
+
+	startTime := time.Now()
+	if err := l.Index.Write(l.flushCache, mfc, scc); err != nil {
+		return err
+	}
+	if l.LoggingEnabled {
+		l.logger.Printf("%s flush to index took %s\n", l.path, time.Since(startTime))
+	}
+
+	l.cacheLock.Lock()
+	l.flushCache = nil
+	l.cacheLock.Unlock()
+
+	// remove all the old segment files
+	fileNames, err := l.segmentFileNames()
+	if err != nil {
+		return err
+	}
+	for _, fn := range fileNames {
+		id, err := idFromFileName(fn)
+		if err != nil {
+			return err
+		}
+		if id <= lastFileID {
+			err := os.Remove(fn)
+			if err != nil {
+				return err
+			}
+		}
+	}
+
+	return nil
+}
+
+// segmentFileNames will return all files that are WAL segment files in sorted order by ascending ID
+func (l *Log) segmentFileNames() ([]string, error) {
+	names, err := filepath.Glob(filepath.Join(l.path, fmt.Sprintf("%s*.%s", WALFilePrefix, WALFileExtension)))
+	if err != nil {
+		return nil, err
+	}
+	sort.Strings(names)
+	return names, nil
+}
+
+// newSegmentFile will close the current segment file and open a new one, updating bookkeeping info on the log
+func (l *Log) newSegmentFile() error {
+	l.currentSegmentID += 1
+	if l.currentSegmentFile != nil {
+		if err := l.currentSegmentFile.Close(); err != nil {
+			return err
+		}
+	}
+
+	fileName := filepath.Join(l.path, fmt.Sprintf("%s%05d.%s", WALFilePrefix, l.currentSegmentID, WALFileExtension))
+	ff, err := os.OpenFile(fileName, os.O_CREATE|os.O_RDWR, 0666)
+	if err != nil {
+		return err
+	}
+	l.currentSegmentSize = 0
+	l.currentSegmentFile = ff
+
+	return nil
+}
+
+// shouldFlush will return the flushType specifying whether we should flush. memoryFlush
+// is never returned from this function since those can only be triggered by writes
+func (l *Log) shouldFlush() flushType {
+	l.cacheLock.RLock()
+	defer l.cacheLock.RUnlock()
+
+	if l.flushRunning {
+		return noFlush
+	}
+
+	if len(l.cache) == 0 {
+		return noFlush
+	}
+
+	if time.Since(l.lastWriteTime) > l.FlushColdInterval {
+		return idleFlush
+	}
+
+	return noFlush
+}
+
+// cursor is a unidirectional iterator for a given entry in the cache
+type walCursor struct {
+	cache     Values
+	position  int
+	ascending bool
+}
+
+func newWALCursor(cache Values, ascending bool) *walCursor {
+	// position is set such that a call to Next will successfully advance
+	// to the next postion and return the value.
+	c := &walCursor{cache: cache, ascending: ascending, position: -1}
+	if !ascending {
+		c.position = len(c.cache)
+	}
+	return c
+}
+
+func (c *walCursor) Ascending() bool { return c.ascending }
+
+// Seek will point the cursor to the given time (or key)
+func (c *walCursor) SeekTo(seek int64) (int64, interface{}) {
+	// Seek cache index
+	c.position = sort.Search(len(c.cache), func(i int) bool {
+		return c.cache[i].Time().UnixNano() >= seek
+	})
+
+	// If seek is not in the cache, return the last value in the cache
+	if !c.ascending && c.position >= len(c.cache) {
+		c.position = len(c.cache) - 1
+	}
+
+	// Make sure our position points to something in the cache
+	if c.position < 0 || c.position >= len(c.cache) {
+		return tsdb.EOF, nil
+	}
+
+	v := c.cache[c.position]
+
+	return v.Time().UnixNano(), v.Value()
+}
+
+// Next moves the cursor to the next key/value. will return nil if at the end
+func (c *walCursor) Next() (int64, interface{}) {
+	var v Value
+	if c.ascending {
+		v = c.nextForward()
+	} else {
+		v = c.nextReverse()
+	}
+
+	return v.Time().UnixNano(), v.Value()
+}
+
+// nextForward advances the cursor forward returning the next value
+func (c *walCursor) nextForward() Value {
+	c.position++
+
+	if c.position >= len(c.cache) {
+		return &EmptyValue{}
+	}
+
+	return c.cache[c.position]
+}
+
+// nextReverse advances the cursor backwards returning the next value
+func (c *walCursor) nextReverse() Value {
+	c.position--
+
+	if c.position < 0 {
+		return &EmptyValue{}
+	}
+
+	return c.cache[c.position]
+}
+
+// deleteData holds the information for a delete entry
+type deleteData struct {
+	// MeasurementName will be empty for deletes that are only against series
+	MeasurementName string
+	Keys            []string
+}
+
+// idFromFileName parses the segment file ID from its name
+func idFromFileName(name string) (int, error) {
+	parts := strings.Split(filepath.Base(name), ".")
+	if len(parts) != 2 {
+		return 0, fmt.Errorf("file %s has wrong name format to have an id", name)
+	}
+
+	id, err := strconv.ParseUint(parts[0][1:], 10, 32)
+
+	return int(id), err
+}
diff --git a/tsdb/engine/tsm1/wal_test.go b/tsdb/engine/tsm1/wal_test.go
new file mode 100644
index 00000000000..9df191c7a7e
--- /dev/null
+++ b/tsdb/engine/tsm1/wal_test.go
@@ -0,0 +1,178 @@
+package tsm1_test
+
+import (
+	"io/ioutil"
+	"os"
+	"reflect"
+	"testing"
+
+	"github.com/influxdb/influxdb/models"
+	"github.com/influxdb/influxdb/tsdb"
+	"github.com/influxdb/influxdb/tsdb/engine/tsm1"
+)
+
+func TestWAL_TestWriteQueryOpen(t *testing.T) {
+	w := NewWAL()
+	defer w.Cleanup()
+
+	var vals map[string]tsm1.Values
+	var fields map[string]*tsdb.MeasurementFields
+	var series []*tsdb.SeriesCreate
+
+	w.Index = &MockIndexWriter{
+		fn: func(valuesByKey map[string]tsm1.Values, measurementFieldsToSave map[string]*tsdb.MeasurementFields, seriesToCreate []*tsdb.SeriesCreate) error {
+			vals = valuesByKey
+			fields = measurementFieldsToSave
+			series = seriesToCreate
+			return nil
+		},
+	}
+
+	if err := w.Open(); err != nil {
+		t.Fatalf("error opening: %s", err.Error())
+	}
+
+	p1 := parsePoint("cpu,host=A value=1.1 1000000000")
+	p2 := parsePoint("cpu,host=B value=1.2 1000000000")
+	p3 := parsePoint("cpu,host=A value=2.1 2000000000")
+	p4 := parsePoint("cpu,host=B value=2.2 2000000000")
+	fieldsToWrite := map[string]*tsdb.MeasurementFields{"foo": {Fields: map[string]*tsdb.Field{"bar": {Name: "value"}}}}
+	seriesToWrite := []*tsdb.SeriesCreate{{Measurement: "asdf"}}
+
+	if err := w.WritePoints([]models.Point{p1, p2}, fieldsToWrite, seriesToWrite); err != nil {
+		t.Fatalf("failed to write points: %s", err.Error())
+	}
+
+	fieldNames := []string{"value"}
+	var codec *tsdb.FieldCodec
+
+	c := w.Cursor("cpu,host=A", fieldNames, codec, true)
+	k, v := c.Next()
+	if k != p1.UnixNano() {
+		t.Fatalf("p1 time wrong:\n\texp:%d\n\tgot:%d\n", p1.UnixNano(), k)
+	}
+	if 1.1 != v {
+		t.Fatal("p1 data not equal")
+	}
+	c = w.Cursor("cpu,host=B", fieldNames, codec, true)
+	k, v = c.Next()
+	if k != p2.UnixNano() {
+		t.Fatalf("p2 time wrong:\n\texp:%d\n\tgot:%d\n", p2.UnixNano(), k)
+	}
+	if 1.2 != v {
+		t.Fatal("p2 data not equal")
+	}
+
+	k, v = c.Next()
+	if k != tsdb.EOF {
+		t.Fatal("expected EOF", k, v)
+	}
+
+	// ensure we can do another write to the wal and get stuff
+	if err := w.WritePoints([]models.Point{p3}, nil, nil); err != nil {
+		t.Fatalf("failed to write: %s", err.Error())
+	}
+
+	c = w.Cursor("cpu,host=A", fieldNames, codec, true)
+	k, v = c.Next()
+	if k != p1.UnixNano() {
+		t.Fatalf("p1 time wrong:\n\texp:%d\n\tgot:%d\n", p1.UnixNano(), k)
+	}
+	if 1.1 != v {
+		t.Fatal("p1 data not equal")
+	}
+	k, v = c.Next()
+	if k != p3.UnixNano() {
+		t.Fatalf("p3 time wrong:\n\texp:%d\n\tgot:%d\n", p3.UnixNano(), k)
+	}
+	if 2.1 != v {
+		t.Fatal("p3 data not equal")
+	}
+
+	// ensure we can seek
+	k, v = c.SeekTo(2000000000)
+	if k != p3.UnixNano() {
+		t.Fatalf("p3 time wrong:\n\texp:%d\n\tgot:%d\n", p3.UnixNano(), k)
+	}
+	if 2.1 != v {
+		t.Fatal("p3 data not equal")
+	}
+	k, v = c.Next()
+	if k != tsdb.EOF {
+		t.Fatal("expected EOF")
+	}
+
+	// ensure we close and after open it flushes to the index
+	if err := w.Close(); err != nil {
+		t.Fatalf("failed to close: %s", err.Error())
+	}
+
+	if err := w.Open(); err != nil {
+		t.Fatalf("failed to open: %s", err.Error())
+	}
+
+	if len(vals[tsm1.SeriesFieldKey("cpu,host=A", "value")]) != 2 {
+		t.Fatal("expected host A values to flush to index on open")
+	}
+
+	if len(vals[tsm1.SeriesFieldKey("cpu,host=B", "value")]) != 1 {
+		t.Fatal("expected host B values to flush to index on open")
+	}
+
+	if err := w.WritePoints([]models.Point{p4}, nil, nil); err != nil {
+		t.Fatalf("failed to write: %s", err.Error())
+	}
+	c = w.Cursor("cpu,host=B", fieldNames, codec, true)
+	k, v = c.Next()
+	if k != p4.UnixNano() {
+		t.Fatalf("p4 time wrong:\n\texp:%d\n\tgot:%d\n", p4.UnixNano(), k)
+	}
+	if 2.2 != v {
+		t.Fatal("p4 data not equal")
+	}
+
+	if !reflect.DeepEqual(fields, fieldsToWrite) {
+		t.Fatal("fields not flushed")
+	}
+
+	if !reflect.DeepEqual(series, seriesToWrite) {
+		t.Fatal("series not flushed")
+	}
+}
+
+type Log struct {
+	*tsm1.Log
+	path string
+}
+
+func NewWAL() *Log {
+	dir, err := ioutil.TempDir("", "tsm1-test")
+	if err != nil {
+		panic("couldn't get temp dir")
+	}
+
+	l := &Log{
+		Log:  tsm1.NewLog(dir),
+		path: dir,
+	}
+	l.LoggingEnabled = true
+	return l
+}
+
+func (l *Log) Cleanup() error {
+	l.Close()
+	os.RemoveAll(l.path)
+	return nil
+}
+
+type MockIndexWriter struct {
+	fn func(valuesByKey map[string]tsm1.Values, measurementFieldsToSave map[string]*tsdb.MeasurementFields, seriesToCreate []*tsdb.SeriesCreate) error
+}
+
+func (m *MockIndexWriter) Write(valuesByKey map[string]tsm1.Values, measurementFieldsToSave map[string]*tsdb.MeasurementFields, seriesToCreate []*tsdb.SeriesCreate) error {
+	return m.fn(valuesByKey, measurementFieldsToSave, seriesToCreate)
+}
+
+func (m *MockIndexWriter) MarkDeletes(keys []string) {}
+
+func (m *MockIndexWriter) MarkMeasurementDelete(name string) {}
diff --git a/tsdb/engine/tsm1/write_lock.go b/tsdb/engine/tsm1/write_lock.go
new file mode 100644
index 00000000000..f4514e58aaa
--- /dev/null
+++ b/tsdb/engine/tsm1/write_lock.go
@@ -0,0 +1,96 @@
+package tsm1
+
+import (
+	"reflect"
+	"sync"
+)
+
+// writeLock is a lock that enables locking of ranges between a
+// min and max value. We use this so that flushes from the WAL
+// can occur concurrently along with compactions.
+type WriteLock struct {
+	rangesLock sync.Mutex
+	ranges     []*rangeLock
+}
+
+// LockRange will ensure an exclusive lock between the min and
+// max values inclusive. Any subsequent calls that have an
+// an overlapping range will have to wait until the previous
+// lock is released. A corresponding call to UnlockRange should
+// be deferred.
+func (w *WriteLock) LockRange(min, max int64) {
+	r := &rangeLock{min: min, max: max}
+	for {
+		ranges := w.currentlyLockedRanges()
+
+		// ensure there are no currently locked ranges that overlap
+		for _, rr := range ranges {
+			if rr.overlaps(r) {
+				// wait until it gets unlocked
+				rr.mu.Lock()
+				// release the lock so the object can get GC'd
+				rr.mu.Unlock()
+			}
+		}
+
+		// ensure that no one else got a lock on the range while we
+		// were waiting
+		w.rangesLock.Lock()
+		if len(w.ranges) == 0 || reflect.DeepEqual(ranges, w.ranges) {
+			// and lock the range
+			r.mu.Lock()
+
+			// now that we know the range is free, add it to the locks
+			w.ranges = append(w.ranges, r)
+			w.rangesLock.Unlock()
+			return
+		}
+
+		// try again
+		w.rangesLock.Unlock()
+	}
+}
+
+// UnlockRange will release a previously locked range.
+func (w *WriteLock) UnlockRange(min, max int64) {
+	w.rangesLock.Lock()
+	defer w.rangesLock.Unlock()
+
+	// take the range out of the slice and unlock it
+	a := make([]*rangeLock, 0)
+	for _, r := range w.ranges {
+		if r.min == min && r.max == max {
+			r.mu.Unlock()
+			continue
+		}
+		a = append(a, r)
+	}
+	w.ranges = a
+}
+
+func (w *WriteLock) currentlyLockedRanges() []*rangeLock {
+	w.rangesLock.Lock()
+	defer w.rangesLock.Unlock()
+	a := make([]*rangeLock, len(w.ranges))
+	copy(a, w.ranges)
+	return a
+}
+
+type rangeLock struct {
+	mu  sync.Mutex
+	min int64
+	max int64
+}
+
+func (r *rangeLock) overlaps(l *rangeLock) bool {
+	if l.min >= r.min && l.min <= r.max {
+		return true
+	} else if l.max >= r.min && l.max <= r.max {
+		return true
+	} else if l.min <= r.min && l.max >= r.max {
+		return true
+	} else if l.min >= r.min && l.max <= r.max {
+		return true
+	}
+	return false
+}
diff --git a/tsdb/engine/tsm1/write_lock_test.go b/tsdb/engine/tsm1/write_lock_test.go
new file mode 100644
index 00000000000..7fa17c530c5
--- /dev/null
+++ b/tsdb/engine/tsm1/write_lock_test.go
@@ -0,0 +1,131 @@
+package tsm1_test
+
+import (
+	// "sync"
+	"testing"
+	"time"
+
+	"github.com/influxdb/influxdb/tsdb/engine/tsm1"
+)
+
+func TestWriteLock_FullCover(t *testing.T) {
+	w := &tsm1.WriteLock{}
+	w.LockRange(2, 10)
+
+	lock := make(chan bool)
+	timeout := time.NewTimer(10 * time.Millisecond)
+	go func() {
+		w.LockRange(1, 11)
+		lock <- true
+	}()
+	select {
+	case <-lock:
+		t.Fatal("able to get lock when we shouldn't")
+	case <-timeout.C:
+		// we're all good
+	}
+}
+
+func TestWriteLock_RightIntersect(t *testing.T) {
+	w := &tsm1.WriteLock{}
+	w.LockRange(2, 10)
+
+	lock := make(chan bool)
+	timeout := time.NewTimer(10 * time.Millisecond)
+	go func() {
+		w.LockRange(5, 15)
+		lock <- true
+	}()
+	select {
+	case <-lock:
+		t.Fatal("able to get lock when we shouldn't")
+	case <-timeout.C:
+		// we're all good
+	}
+}
+
+func TestWriteLock_LeftIntersect(t *testing.T) {
+	w := &tsm1.WriteLock{}
+	w.LockRange(1, 4)
+
+	lock := make(chan bool)
+	timeout := time.NewTimer(10 * time.Millisecond)
+	go func() {
+		w.LockRange(1, 11)
+		lock <- true
+	}()
+	select {
+	case <-lock:
+		t.Fatal("able to get lock when we shouldn't")
+	case <-timeout.C:
+		// we're all good
+	}
+}
+
+func TestWriteLock_Inside(t *testing.T) {
+	w := &tsm1.WriteLock{}
+	w.LockRange(4, 8)
+
+	lock := make(chan bool)
+	timeout := time.NewTimer(10 * time.Millisecond)
+	go func() {
+		w.LockRange(1, 11)
+		lock <- true
+	}()
+	select {
+	case <-lock:
+		t.Fatal("able to get lock when we shouldn't")
+	case <-timeout.C:
+		// we're all good
+	}
+}
+
+func TestWriteLock_Same(t *testing.T) {
+	w := &tsm1.WriteLock{}
+	w.LockRange(2, 10)
+
+	lock := make(chan bool)
+	timeout := time.NewTimer(10 * time.Millisecond)
+	go func() {
+		w.LockRange(2, 10)
+		lock <- true
+	}()
+	select {
+	case <-lock:
+		t.Fatal("able to get lock when we shouldn't")
+	case <-timeout.C:
+		// we're all good
+	}
+}
+
+// func TestWriteLock_FreeRangeWithContentionElsewhere(t *testing.T) {
+// 	w := &tsm1.WriteLock{}
+// 	w.LockRange(2, 10)
+
+// 	lock := make(chan bool)
+// 	freeRange := make(chan bool)
+// 	timeout := time.NewTimer(10 * time.Millisecond)
+// 	var wg sync.WaitGroup
+
+// 	wg.Add(1)
+// 	go func() {
+// 		wg.Done()
+// 		w.LockRange(4, 12)
+// 		lock <- true
+// 	}()
+
+// 	// make sure the other go func has gotten to the point of requesting the lock
+// 	wg.Wait()
+// 	go func() {
+// 		w.LockRange(15, 23)
+// 		freeRange <- true
+// 	}()
+// 	select {
+// 	case <-lock:
+// 		t.Fatal("able to get lock when we shouldn't")
+// 	case <-timeout.C:
+// 		t.Fatal("unable to get lock of free range when contention exists elsewhere")
+// 	case <-freeRange:
+// 		// we're all good
+// 	}
+// }
diff --git a/tsdb/shard.go b/tsdb/shard.go
index e355cc661de..3a7215e0857 100644
--- a/tsdb/shard.go
+++ b/tsdb/shard.go
@@ -16,7 +16,6 @@ import (
 	"github.com/influxdb/influxdb/models"
 	"github.com/influxdb/influxdb/tsdb/internal"
 
-	"github.com/boltdb/bolt"
 	"github.com/gogo/protobuf/proto"
 )
 
@@ -49,7 +48,6 @@ var (
 // Data can be split across many shards. The query engine in TSDB is responsible
 // for combining the output of many shards into a single query result.
 type Shard struct {
-	db      *bolt.DB // underlying data store
 	index   *DatabaseIndex
 	path    string
 	walPath string
@@ -91,6 +89,12 @@ func NewShard(id uint64, index *DatabaseIndex, path string, walPath string, opti
 // Path returns the path set on the shard when it was created.
 func (s *Shard) Path() string { return s.path }
 
+// PerformMaintenance gets called periodically to have the engine perform
+// any maintenance tasks like WAL flushing and compaction
+func (s *Shard) PerformMaintenance() {
+	s.engine.PerformMaintenance()
+}
+
 // open initializes and opens the shard's store.
 func (s *Shard) Open() error {
 	if err := func() error {
@@ -121,7 +125,7 @@ func (s *Shard) Open() error {
 		}
 
 		// Load metadata index.
-		if err := s.engine.LoadMetadataIndex(s.index, s.measurementFields); err != nil {
+		if err := s.engine.LoadMetadataIndex(s, s.index, s.measurementFields); err != nil {
 			return fmt.Errorf("load metadata index: %s", err)
 		}
 
@@ -229,27 +233,30 @@ func (s *Shard) WritePoints(points []models.Point) error {
 	}
 
 	// make sure all data is encoded before attempting to save to bolt
-	for _, p := range points {
-		// Ignore if raw data has already been marshaled.
-		if p.Data() != nil {
-			continue
-		}
+	// only required for the b1 and bz1 formats
+	if s.engine.Format() != TSM1Format {
+		for _, p := range points {
+			// Ignore if raw data has already been marshaled.
+			if p.Data() != nil {
+				continue
+			}
 
-		// This was populated earlier, don't need to validate that it's there.
-		s.mu.RLock()
-		mf := s.measurementFields[p.Name()]
-		s.mu.RUnlock()
+			// This was populated earlier, don't need to validate that it's there.
+			s.mu.RLock()
+			mf := s.measurementFields[p.Name()]
+			s.mu.RUnlock()
 
-		// If a measurement is dropped while writes for it are in progress, this could be nil
-		if mf == nil {
-			return ErrFieldNotFound
-		}
+			// If a measurement is dropped while writes for it are in progress, this could be nil
+			if mf == nil {
+				return ErrFieldNotFound
+			}
 
-		data, err := mf.Codec.EncodeFields(p.Fields())
-		if err != nil {
-			return err
+			data, err := mf.Codec.EncodeFields(p.Fields())
+			if err != nil {
+				return err
+			}
+			p.SetData(data)
 		}
-		p.SetData(data)
 	}
 
 	// Write to the engine.
@@ -360,7 +367,9 @@ func (s *Shard) createFieldsAndMeasurements(fieldsToCreate []*FieldCreate) (map[
 		measurementsToSave[f.Measurement] = m
 
 		// add the field to the in memory index
-		if err := m.CreateFieldIfNotExists(f.Field.Name, f.Field.Type); err != nil {
+		// only limit the field count for non-tsm eninges
+		limitFieldCount := s.engine.Format() == B1Format || s.engine.Format() == BZ1Format
+		if err := m.CreateFieldIfNotExists(f.Field.Name, f.Field.Type, limitFieldCount); err != nil {
 			return nil, err
 		}
 
@@ -468,7 +477,7 @@ func (m *MeasurementFields) UnmarshalBinary(buf []byte) error {
 // CreateFieldIfNotExists creates a new field with an autoincrementing ID.
 // Returns an error if 255 fields have already been created on the measurement or
 // the fields already exists with a different type.
-func (m *MeasurementFields) CreateFieldIfNotExists(name string, typ influxql.DataType) error {
+func (m *MeasurementFields) CreateFieldIfNotExists(name string, typ influxql.DataType, limitCount bool) error {
 	// Ignore if the field already exists.
 	if f := m.Fields[name]; f != nil {
 		if f.Type != typ {
@@ -477,8 +486,8 @@ func (m *MeasurementFields) CreateFieldIfNotExists(name string, typ influxql.Dat
 		return nil
 	}
 
-	// Only 255 fields are allowed. If we go over that then return an error.
-	if len(m.Fields)+1 > math.MaxUint8 {
+	// If we're supposed to limit the number of fields, only 255 are allowed. If we go over that then return an error.
+	if len(m.Fields)+1 > math.MaxUint8 && limitCount {
 		return ErrFieldOverflow
 	}
 
@@ -741,15 +750,22 @@ func (f *FieldCodec) DecodeByID(targetID uint8, b []byte) (interface{}, error) {
 // DecodeByName scans a byte slice for a field with the given name, converts it to its
 // expected type, and return that value.
 func (f *FieldCodec) DecodeByName(name string, b []byte) (interface{}, error) {
-	fi := f.fieldByName(name)
+	fi := f.FieldByName(name)
 	if fi == nil {
 		return 0, ErrFieldNotFound
 	}
 	return f.DecodeByID(fi.ID, b)
 }
 
+func (f *FieldCodec) Fields() (a []*Field) {
+	for _, f := range f.fieldsByID {
+		a = append(a, f)
+	}
+	return
+}
+
 // FieldByName returns the field by its name. It will return a nil if not found
-func (f *FieldCodec) fieldByName(name string) *Field {
+func (f *FieldCodec) FieldByName(name string) *Field {
 	return f.fieldsByName[name]
 }
 
diff --git a/tsdb/snapshot_writer.go b/tsdb/snapshot_writer.go
index 785ca13908c..4a0a2d3edef 100644
--- a/tsdb/snapshot_writer.go
+++ b/tsdb/snapshot_writer.go
@@ -8,7 +8,6 @@ import (
 	"path/filepath"
 	"time"
 
-	"github.com/boltdb/bolt"
 	"github.com/influxdb/influxdb/snapshot"
 )
 
@@ -83,7 +82,7 @@ func appendShardSnapshotFile(sw *snapshot.Writer, sh *Shard, name string) error
 	}
 
 	// Begin transaction.
-	tx, err := sh.db.Begin(false)
+	tx, err := sh.engine.Begin(false)
 	if err != nil {
 		return fmt.Errorf("begin: %s", err)
 	}
@@ -103,7 +102,7 @@ func appendShardSnapshotFile(sw *snapshot.Writer, sh *Shard, name string) error
 
 // boltTxCloser wraps a Bolt transaction to implement io.Closer.
 type boltTxCloser struct {
-	*bolt.Tx
+	Tx
 }
 
 // Close rolls back the transaction.
diff --git a/tsdb/store.go b/tsdb/store.go
index 13235a16844..be7076d00e2 100644
--- a/tsdb/store.go
+++ b/tsdb/store.go
@@ -9,6 +9,7 @@ import (
 	"strconv"
 	"strings"
 	"sync"
+	"time"
 
 	"github.com/influxdb/influxdb/influxql"
 	"github.com/influxdb/influxdb/models"
@@ -27,6 +28,11 @@ func NewStore(path string) *Store {
 
 var (
 	ErrShardNotFound = fmt.Errorf("shard not found")
+	ErrStoreClosed   = fmt.Errorf("store is closed")
+)
+
+const (
+	MaintenanceCheckInterval = time.Minute
 )
 
 type Store struct {
@@ -38,7 +44,10 @@ type Store struct {
 
 	EngineOptions EngineOptions
 	Logger        *log.Logger
-	closing       chan struct{}
+
+	closing chan struct{}
+	wg      sync.WaitGroup
+	opened  bool
 }
 
 // Path returns the store's root path.
@@ -71,7 +80,7 @@ func (s *Store) CreateShard(database, retentionPolicy string, shardID uint64) er
 
 	select {
 	case <-s.closing:
-		return fmt.Errorf("closing")
+		return ErrStoreClosed
 	default:
 	}
 
@@ -124,7 +133,7 @@ func (s *Store) DeleteShard(shardID uint64) error {
 		return err
 	}
 
-	if err := os.Remove(sh.path); err != nil {
+	if err := os.RemoveAll(sh.path); err != nil {
 		return err
 	}
 
@@ -301,6 +310,41 @@ func (s *Store) loadShards() error {
 
 }
 
+// periodicMaintenance is the method called in a goroutine on the opening of the store
+// to perform periodic maintenance of the shards.
+func (s *Store) periodicMaintenance() {
+	t := time.NewTicker(MaintenanceCheckInterval)
+	for {
+		select {
+		case <-t.C:
+			s.performMaintenance()
+		case <-s.closing:
+			t.Stop()
+			return
+		}
+	}
+}
+
+// performMaintenance will loop through the shars and tell them
+// to perform any maintenance tasks. Those tasks should kick off
+// their own goroutines if it's anything that could take time.
+func (s *Store) performMaintenance() {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	for _, sh := range s.shards {
+		s.performMaintenanceOnShard(sh)
+	}
+}
+
+func (s *Store) performMaintenanceOnShard(shard *Shard) {
+	defer func() {
+		if r := recover(); r != nil {
+			s.Logger.Printf("recovered eror in maintenance on shard %d", shard.id)
+		}
+	}()
+	shard.PerformMaintenance()
+}
+
 func (s *Store) Open() error {
 	s.mu.Lock()
 	defer s.mu.Unlock()
@@ -326,12 +370,22 @@ func (s *Store) Open() error {
 		return err
 	}
 
+	go s.periodicMaintenance()
+	s.opened = true
+
 	return nil
 }
 
 func (s *Store) WriteToShard(shardID uint64, points []models.Point) error {
 	s.mu.RLock()
 	defer s.mu.RUnlock()
+
+	select {
+	case <-s.closing:
+		return ErrStoreClosed
+	default:
+	}
+
 	sh, ok := s.shards[shardID]
 	if !ok {
 		return ErrShardNotFound
@@ -367,15 +421,17 @@ func (s *Store) Close() error {
 	s.mu.Lock()
 	defer s.mu.Unlock()
 
+	if s.opened {
+		close(s.closing)
+	}
+	s.wg.Wait()
+
 	for _, sh := range s.shards {
 		if err := sh.Close(); err != nil {
 			return err
 		}
 	}
-	if s.closing != nil {
-		close(s.closing)
-	}
-	s.closing = nil
+	s.opened = false
 	s.shards = nil
 	s.databaseIndexes = nil