FeatureBaseDB
diff --git a/‎api.go‎
Lines changed: 41 additions & 24 deletions b/‎api.go‎
Lines changed: 41 additions & 24 deletions
diff --git a/‎api_directive.go‎
Lines changed: 1 addition & 1 deletion b/‎api_directive.go‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎batch/batch.go‎
Lines changed: 57 additions & 6 deletions b/‎batch/batch.go‎
Lines changed: 57 additions & 6 deletions
diff --git a/‎batch/batch_test.go‎
Lines changed: 19 additions & 4 deletions b/‎batch/batch_test.go‎
Lines changed: 19 additions & 4 deletions
diff --git a/‎catcher.go‎
Lines changed: 11 additions & 0 deletions b/‎catcher.go‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎dax/queryer/orchestrator.go‎
Lines changed: 3 additions & 3 deletions b/‎dax/queryer/orchestrator.go‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎dax/table.go‎
Lines changed: 1 addition & 0 deletions b/‎dax/table.go‎
Lines changed: 1 addition & 0 deletions
@@ -367,8 +367,8 @@ func (api *API) DeleteIndex(ctx context.Context, indexName string) error {
 }
 
 // CreateField makes the named field in the named index with the given options.
-// This method currently only takes a single functional option, but that may be
-// changed in the future to support multiple options.
+//
+// The resulting field will always have TrackExistence set.
 func (api *API) CreateField(ctx context.Context, indexName string, fieldName string, opts ...FieldOption) (*Field, error) {
 	span, _ := tracing.StartSpanFromContext(ctx, "API.CreateField")
 	defer span.Finish()
@@ -381,6 +381,11 @@ func (api *API) CreateField(ctx context.Context, indexName string, fieldName str
 	// authN/Z info
 	requestUserID, _ := fbcontext.UserID(ctx) // requestUserID is "" if not in ctx
 
+	// newFieldOptions is also used in the path through the index creating
+	// a field from an update from DAX, so it can't assume it can always
+	// override this. But we're the call path for creating new fields, and
+	// new fields should always have TrackExistence on.
+	opts = append(opts, OptFieldTrackExistence())
 	// Apply and validate functional options.
 	fo, err := newFieldOptions(opts...)
 	if err != nil {
@@ -500,10 +505,16 @@ func importWorker(importWork chan importJob) {
 				// incorrectly). One way to address this would be to change the logic
 				// overall so there weren't conflicts. For now, we just
 				// rely on the field type to inform the intended view name.
-				if viewName == "" {
+				// contrast with cleanupView, which is similar but unfortunately not quite identical
+				switch viewName {
+				case "":
 					viewName = viewStandard
-				} else if j.field.Type() == FieldTypeTime {
-					viewName = fmt.Sprintf("%s_%s", viewStandard, viewName)
+				case viewStandard, viewExistence:
+					// do nothing, these are fine
+				default: // possibly a time view
+					if j.field.Type() == FieldTypeTime && !strings.HasPrefix(viewName, viewStandard) {
+						viewName = viewStandard + "_" + viewName
+					}
 				}
 				if len(viewData) == 0 {
 					return fmt.Errorf("no data to import for view: %s", viewName)
@@ -1316,7 +1327,6 @@ type ImportOptions struct {
 	Clear          bool
 	IgnoreKeyCheck bool
 	Presorted      bool
-	fullySorted    bool // format-aware sorting, internal use only please.
 	suppressLog    bool
 
 	// test Tx atomicity if > 0
@@ -1523,7 +1533,6 @@ func (api *API) ImportWithTx(ctx context.Context, qcx *Qcx, req *ImportRequest,
 		return errors.Wrap(err, "validating api method")
 	}
 
-	api.server.logger.Debugf("ImportWithTx: %v %v %v", req.Index, req.Field, req.Shard)
 	idx, field, err := api.indexField(req.Index, req.Field, req.Shard)
 	if err != nil {
 		return errors.Wrap(err, "getting index and field")
@@ -1642,6 +1651,12 @@ func (api *API) ImportWithTx(ctx context.Context, qcx *Qcx, req *ImportRequest,
 // across many fields in a single shard. It can both set and clear
 // bits and updates caches/bitDepth as appropriate, although only the
 // bitmap parts happen truly transactionally.
+//
+// This function does not attempt to do existence tracking, because
+// it can't; there's no way to distinguish empty sets from not setting
+// bits. As a result, users of this endpoint are responsible for
+// providing corrected existence views for fields with existence
+// tracking. Our batch API does that.
 func (api *API) ImportRoaringShard(ctx context.Context, indexName string, shard uint64, req *ImportRoaringShardRequest) error {
 	index, err := api.Index(ctx, indexName)
 	if err != nil {
@@ -1768,12 +1783,15 @@ func cleanupView(fieldType string, viewUpdate *RoaringUpdate) error {
 	// TODO wouldn't hurt to have consolidated logic somewhere for validating view names.
 	switch fieldType {
 	case FieldTypeSet, FieldTypeTime:
-		if viewUpdate.View == "" {
-			viewUpdate.View = "standard"
-		}
-		// add 'standard_' if we just have a time... this is how IDK works by default
-		if fieldType == FieldTypeTime && !strings.HasPrefix(viewUpdate.View, viewStandard) {
-			viewUpdate.View = fmt.Sprintf("%s_%s", viewStandard, viewUpdate.View)
+		switch viewUpdate.View {
+		case "":
+			viewUpdate.View = viewStandard
+		case viewStandard, viewExistence:
+			// do nothing, these are fine
+		default:
+			if fieldType == FieldTypeTime && !strings.HasPrefix(viewUpdate.View, viewStandard) {
+				viewUpdate.View = viewStandard + "_" + viewUpdate.View
+			}
 		}
 	case FieldTypeInt, FieldTypeDecimal, FieldTypeTimestamp:
 		if viewUpdate.View == "" {
@@ -2038,21 +2056,20 @@ func (api *API) ImportValueWithTx(ctx context.Context, qcx *Qcx, req *ImportValu
 	return nil
 }
 
-func importExistenceColumns(qcx *Qcx, index *Index, columnIDs []uint64, shard uint64) error {
+func importExistenceColumns(qcx *Qcx, index *Index, columnIDs []uint64, shard uint64) (err0 error) {
 	ef := index.existenceField()
 	if ef == nil {
 		return nil
 	}
-
-	existenceRowIDs := make([]uint64, len(columnIDs))
-	// If we don't gratuitously hand-duplicate things in field.Import,
-	// the fact that fragment.bulkImport rewrites its row and column
-	// lists can burn us if we don't make a copy before doing the
-	// existence field write.
-	columnCopy := make([]uint64, len(columnIDs))
-	copy(columnCopy, columnIDs)
-	options := ImportOptions{}
-	return ef.Import(qcx, existenceRowIDs, columnCopy, nil, shard, &options)
+	tx, finisher, err := qcx.GetTx(Txo{Write: true, Index: index, Shard: shard})
+	if err != nil {
+		return err
+	}
+	defer finisher(&err0)
+	// markExistingInView is simpler/faster than Import, but unusually, we use the
+	// standard view of the existence field, instead of the existence view of
+	// a specific field, when doing the index-wide update.
+	return ef.markExistingInView(tx, columnIDs, viewStandard, shard)
 }
 
 // ShardDistribution returns an object representing the distribution of shards
 
@@ -964,7 +964,7 @@ func createField(idx *Index, fld *dax.Field) error {
 		return errors.Wrapf(err, "creating field options from field: %s", fld.Name)
 	}
 
-	if _, err := idx.CreateField(string(fld.Name), "", opts...); err != nil {
+	if _, err := idx.createNullableField(string(fld.Name), "", opts...); err != nil {
 		return errors.Wrapf(err, "creating field on index: %s", fld.Name)
 	}
 	return nil
 
@@ -573,7 +573,7 @@ func (b *Batch) Add(rec Row) error {
 		case int64:
 			b.values[field.Name] = append(b.values[field.Name], val)
 		case []string:
-			if len(val) == 0 {
+			if val == nil {
 				continue
 			}
 			rowIDSets, ok := b.rowIDSets[field.Name]
@@ -608,7 +608,8 @@ func (b *Batch) Add(rec Row) error {
 			}
 			b.rowIDSets[field.Name] = append(rowIDSets, rowIDs)
 		case []uint64:
-			if len(val) == 0 {
+			// if length is 0, that's still a valid, empty, set
+			if val == nil {
 				continue
 			}
 			rowIDSets, ok := b.rowIDSets[field.Name]
@@ -663,6 +664,9 @@ func (b *Batch) Add(rec Row) error {
 
 	for i, uval := range rec.Clears {
 		field := b.header[i]
+		if field.Options.Type == featurebase.FieldTypeMutex && uval != nil {
+			return errors.Errorf("individual-bit clears not allowed on mutex fields; use nil to clear a mutex")
+		}
 		if _, ok := b.clearRowIDs[i]; !ok {
 			b.clearRowIDs[i] = make(map[int]uint64)
 		}
@@ -1245,7 +1249,7 @@ func (b *Batch) doImport(frags, clearFrags fragments) error {
 			}
 
 			ferr := b.importer.ImportRoaringBitmap(ctx, b.tbl.ID, fld, shard, viewMap, false)
-			b.log.Debugf("imp-roar    field: %s, shard:%d, views:%d %v", field, shard, len(clearViewMap), time.Since(starty))
+			b.log.Debugf("imp-roar    field: %s, shard:%d, views:%d %v", field, shard, len(viewMap), time.Since(starty))
 			return errors.Wrapf(ferr, "importing data for %s", field)
 		})
 	}
@@ -1343,6 +1347,7 @@ func (b *Batch) makeFragments(frags, clearFrags fragments) (fragments, fragments
 		curShard := ^uint64(0) // impossible sentinel value for shard.
 		var curBM *roaring.Bitmap
 		var clearBM *roaring.Bitmap
+		var existCurBM *roaring.Bitmap
 		for j := range b.ids {
 			col := b.ids[j]
 			row := nilSentinel
@@ -1355,8 +1360,12 @@ func (b *Batch) makeFragments(frags, clearFrags fragments) (fragments, fragments
 
 			if col/shardWidth != curShard {
 				curShard = col / shardWidth
+				// the API treats "" as standard
 				curBM = frags.GetOrCreate(curShard, field.Name, "")
 				clearBM = clearFrags.GetOrCreate(curShard, field.Name, "")
+				if opts.TrackExistence {
+					existCurBM = frags.GetOrCreate(curShard, field.Name, "existence")
+				}
 			}
 			if row != nilSentinel {
 				// TODO this is super ugly, but we want to avoid setting
@@ -1366,6 +1375,9 @@ func (b *Batch) makeFragments(frags, clearFrags fragments) (fragments, fragments
 				// the NoStandardView case would be great.
 				if !(opts.Type == featurebase.FieldTypeTime && opts.NoStandardView) {
 					curBM.DirectAdd(row*shardWidth + (col % shardWidth))
+					if opts.TrackExistence {
+						existCurBM.DirectAdd(col % shardWidth)
+					}
 				}
 				if opts.Type == featurebase.FieldTypeTime {
 					views, err := b.times[j].views(opts.TimeQuantum)
@@ -1386,6 +1398,11 @@ func (b *Batch) makeFragments(frags, clearFrags fragments) (fragments, fragments
 				// we want to make sure that at this point, the "set"
 				// fragments don't contain the bit that we're clearing
 				curBM.DirectRemoveN(clearRow*shardWidth + (col % shardWidth))
+				// don't set the existence bit, probably? i don't actually quite
+				// understand the higher level semantics here.
+				if opts.TrackExistence {
+					existCurBM.DirectRemoveN(col % shardWidth)
+				}
 			}
 		}
 	}
@@ -1404,14 +1421,22 @@ func (b *Batch) makeFragments(frags, clearFrags fragments) (fragments, fragments
 		opts := field.Options
 		curShard := ^uint64(0) // impossible sentinel value for shard.
 		var curBM *roaring.Bitmap
+		var existCurBM *roaring.Bitmap
 		for j := range b.ids {
 			col, rowIDs := b.ids[j], rowIDSets[j]
-			if len(rowIDs) == 0 {
-				continue
-			}
 			if col/shardWidth != curShard {
 				curShard = col / shardWidth
 				curBM = frags.GetOrCreate(curShard, fname, "")
+				if opts.TrackExistence {
+					existCurBM = frags.GetOrCreate(curShard, fname, "existence")
+				}
+			}
+			if len(rowIDs) == 0 {
+				// you can validly specify an empty set, which is not the same as a null
+				if opts.TrackExistence && !(opts.Type == featurebase.FieldTypeTime && opts.NoStandardView) && rowIDs != nil {
+					existCurBM.DirectAdd(col % shardWidth)
+				}
+				continue
 			}
 			// TODO this is super ugly, but we want to avoid setting
 			// bits on the standard view in the specific case when
@@ -1422,6 +1447,9 @@ func (b *Batch) makeFragments(frags, clearFrags fragments) (fragments, fragments
 				for _, row := range rowIDs {
 					curBM.DirectAdd(row*shardWidth + (col % shardWidth))
 				}
+				if opts.TrackExistence {
+					existCurBM.DirectAdd(col % shardWidth)
+				}
 			}
 			if opts.Type == featurebase.FieldTypeTime {
 				views, err := b.times[j].views(opts.TimeQuantum)
@@ -1549,6 +1577,11 @@ func (b *Batch) makeSingleValFragments(frags, clearFrags fragments) (fragments,
 		shard := ids[0] / shardWidth
 		bitmap := frags.GetOrCreate(shard, field.Name, "standard")
 		clearBM := clearFrags.GetOrCreate(shard, field.Name, "standard")
+		var existBM, existClearBM *roaring.Bitmap
+		if field.Options.TrackExistence {
+			existBM = frags.GetOrCreate(shard, field.Name, "existence")
+			existClearBM = clearFrags.GetOrCreate(shard, field.Name, "existence")
+		}
 		for i, id := range ids {
 			if i+1 < len(ids) {
 				// we only want the last value set for each id
@@ -1561,13 +1594,22 @@ func (b *Batch) makeSingleValFragments(frags, clearFrags fragments) (fragments,
 				shard = id / shardWidth
 				bitmap = frags.GetOrCreate(shard, field.Name, "standard")
 				clearBM = clearFrags.GetOrCreate(shard, field.Name, "standard")
+				if field.Options.TrackExistence {
+					existBM = frags.GetOrCreate(shard, field.Name, "existence")
+					existClearBM = clearFrags.GetOrCreate(shard, field.Name, "existence")
+				}
 			}
 			fragmentColumn := id % shardWidth
 			clearBM.Add(fragmentColumn) // Will use this to clear columns.
 			if row != clearSentinel {
 				// clearSentinel is used for deletion
 				// so this value should only be added if its not clearSentinel
 				bitmap.Add(row*shardWidth + fragmentColumn)
+				if field.Options.TrackExistence {
+					existBM.Add(fragmentColumn)
+				}
+			} else if field.Options.TrackExistence {
+				existClearBM.Add(fragmentColumn)
 			}
 		}
 	}
@@ -1596,6 +1638,11 @@ func (b *Batch) makeSingleValFragments(frags, clearFrags fragments) (fragments,
 			fragmentColumn := recID % shardWidth
 
 			clearBM.Add(fragmentColumn)
+			if field.Options.TrackExistence {
+				existClearBM := clearFrags.GetOrCreate(shard, field.Name, "existence")
+
+				existClearBM.Add(fragmentColumn)
+			}
 		}
 	}
 
@@ -1618,6 +1665,10 @@ func (b *Batch) makeSingleValFragments(frags, clearFrags fragments) (fragments,
 
 			fragmentColumn := recID % shardWidth
 			clearBM.Add(fragmentColumn)
+			if field.Options.TrackExistence {
+				exist := frags.GetOrCreate(shard, field.Name, "existence")
+				exist.Add(fragmentColumn)
+			}
 
 			if boolVal {
 				bitmap.Add(trueRowOffset + fragmentColumn)
 
@@ -103,6 +103,12 @@ func testStringSliceCombos(t *testing.T, importer featurebase.Importer, sapi fea
 		Index: idx.Name,
 		Query: "TopN(a1, n=10)",
 	})
+	if resp.Err != nil {
+		t.Fatalf("unexpected error from TopN query: %v", resp.Err)
+	}
+	if len(resp.Results) < 1 {
+		t.Fatalf("expected non-empty result set, got empty results")
+	}
 	pairsField, ok := resp.Results[0].(*featurebase.PairsField)
 	assert.True(t, ok, "wrong return type: %T", resp.Results[0])
 
@@ -508,10 +514,11 @@ func testStringSliceEmptyAndNil(t *testing.T, importer featurebase.Importer, sap
 			{
 				Name: "strslice",
 				Options: featurebase.FieldOptions{
-					Type:      featurebase.FieldTypeSet,
-					Keys:      true,
-					CacheType: featurebase.CacheTypeRanked,
-					CacheSize: 100,
+					Type:           featurebase.FieldTypeSet,
+					Keys:           true,
+					CacheType:      featurebase.CacheTypeRanked,
+					CacheSize:      100,
+					TrackExistence: true,
 				},
 			},
 		},
@@ -611,6 +618,14 @@ func testStringSliceEmptyAndNil(t *testing.T, importer featurebase.Importer, sap
 			pql: "Row(strslice='z')",
 			exp: []uint64{2},
 		},
+		{
+			pql: "Row(strslice==null)",
+			exp: []uint64{1},
+		},
+		{
+			pql: "Row(strslice!=null)",
+			exp: []uint64{0, 2, 3, 4},
+		},
 	}
 	for i, test := range tests {
 		t.Run(fmt.Sprintf("test-%d", i), func(t *testing.T) {
 
@@ -124,6 +124,17 @@ func (c *catcherTx) Remove(index, field, view string, shard uint64, a ...uint64)
 	return c.b.Remove(index, field, view, shard, a...)
 }
 
+func (c *catcherTx) Removed(index, field, view string, shard uint64, a ...uint64) (changed []uint64, err error) {
+
+	defer func() {
+		if r := recover(); r != nil {
+			vprint.AlwaysPrintf("see Removed() PanicOn '%v' at '%v'", r, vprint.Stack())
+			vprint.PanicOn(r)
+		}
+	}()
+	return c.b.Removed(index, field, view, shard, a...)
+}
+
 func (c *catcherTx) Contains(index, field, view string, shard uint64, key uint64) (exists bool, err error) {
 
 	defer func() {
 
@@ -3302,6 +3302,9 @@ func (o *orchestrator) translateResult(ctx context.Context, qtbl *dax.QualifiedT
 						return nil, errors.Wrapf(err, "orch: translating IDs of field %q", v)
 					}
 					mapper = func(ids []uint64) (interface{}, error) {
+						if ids == nil {
+							return []string(nil), nil
+						}
 						keys := make([]string, len(ids))
 						for i, id := range ids {
 							keys[i] = translations[id]
@@ -3311,9 +3314,6 @@ func (o *orchestrator) translateResult(ctx context.Context, qtbl *dax.QualifiedT
 				} else {
 					datatype = "[]uint64"
 					mapper = func(ids []uint64) (interface{}, error) {
-						if ids == nil {
-							ids = []uint64{}
-						}
 						return ids, nil
 					}
 				}
 
@@ -823,4 +823,5 @@ type FieldOptions struct {
 	TimeQuantum    TimeQuantum   `json:"time-quantum,omitempty"`
 	TTL            time.Duration `json:"ttl,omitempty"`
 	ForeignIndex   string        `json:"foreign-index,omitempty"`
+	TrackExistence bool          `json:"track-existence"`
 }
Original file line number	Diff line number	Diff line change
`@@ -964,7 +964,7 @@ func createField(idx Index, fld dax.Field) error {`
`964`	`964`	`return errors.Wrapf(err, "creating field options from field: %s", fld.Name)`
`965`	`965`	`}`
`966`	`966`
`967`		`- if _, err := idx.CreateField(string(fld.Name), "", opts...); err != nil {`
	`967`	`+ if _, err := idx.createNullableField(string(fld.Name), "", opts...); err != nil {`
`968`	`968`	`return errors.Wrapf(err, "creating field on index: %s", fld.Name)`
`969`	`969`	`}`
`970`	`970`	`return nil`
Original file line number	Diff line number	Diff line change
`@@ -3302,6 +3302,9 @@ func (o orchestrator) translateResult(ctx context.Context, qtbl dax.QualifiedT`
`3302`	`3302`	`return nil, errors.Wrapf(err, "orch: translating IDs of field %q", v)`
`3303`	`3303`	`}`
`3304`	`3304`	`mapper = func(ids []uint64) (interface{}, error) {`
	`3305`	`+ if ids == nil {`
	`3306`	`+ return []string(nil), nil`
	`3307`	`+ }`
`3305`	`3308`	`keys := make([]string, len(ids))`
`3306`	`3309`	`for i, id := range ids {`
`3307`	`3310`	`keys[i] = translations[id]`
`@@ -3311,9 +3314,6 @@ func (o orchestrator) translateResult(ctx context.Context, qtbl dax.QualifiedT`
`3311`	`3314`	`} else {`
`3312`	`3315`	`datatype = "[]uint64"`
`3313`	`3316`	`mapper = func(ids []uint64) (interface{}, error) {`
`3314`		`- if ids == nil {`
`3315`		`- ids = []uint64{}`
`3316`		`- }`
`3317`	`3317`	`return ids, nil`
`3318`	`3318`	`}`
`3319`	`3319`	`}`
Original file line number	Diff line number	Diff line change
`@@ -823,4 +823,5 @@ type FieldOptions struct {`
`823`	`823`	TimeQuantum TimeQuantum `json:"time-quantum,omitempty"`
`824`	`824`	TTL time.Duration `json:"ttl,omitempty"`
`825`	`825`	ForeignIndex string `json:"foreign-index,omitempty"`
	`826`	+ TrackExistence bool `json:"track-existence"`
`826`	`827`	`}`