Skip to content

Commit

Permalink
start migrate Field to &str (#1772)
Browse files Browse the repository at this point in the history
start migrate Field to &str in preparation of columnar
return Result for get_field
  • Loading branch information
PSeitz authored Jan 18, 2023
1 parent c4af63e commit f687b3a
Show file tree
Hide file tree
Showing 30 changed files with 294 additions and 247 deletions.
12 changes: 7 additions & 5 deletions examples/custom_collector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use fastfield_codecs::Column;
// Importing tantivy...
use tantivy::collector::{Collector, SegmentCollector};
use tantivy::query::QueryParser;
use tantivy::schema::{Field, Schema, FAST, INDEXED, TEXT};
use tantivy::schema::{Schema, FAST, INDEXED, TEXT};
use tantivy::{doc, Index, Score, SegmentReader};

#[derive(Default)]
Expand Down Expand Up @@ -52,11 +52,11 @@ impl Stats {
}

struct StatsCollector {
field: Field,
field: String,
}

impl StatsCollector {
fn with_field(field: Field) -> StatsCollector {
fn with_field(field: String) -> StatsCollector {
StatsCollector { field }
}
}
Expand All @@ -73,7 +73,7 @@ impl Collector for StatsCollector {
_segment_local_id: u32,
segment_reader: &SegmentReader,
) -> tantivy::Result<StatsSegmentCollector> {
let fast_field_reader = segment_reader.fast_fields().u64(self.field)?;
let fast_field_reader = segment_reader.fast_fields().u64(&self.field)?;
Ok(StatsSegmentCollector {
fast_field_reader,
stats: Stats::default(),
Expand Down Expand Up @@ -171,7 +171,9 @@ fn main() -> tantivy::Result<()> {

// here we want to get a hit on the 'ken' in Frankenstein
let query = query_parser.parse_query("broom")?;
if let Some(stats) = searcher.search(&query, &StatsCollector::with_field(price))? {
if let Some(stats) =
searcher.search(&query, &StatsCollector::with_field("price".to_string()))?
{
println!("count: {}", stats.count());
println!("mean: {}", stats.mean());
println!("standard deviation: {}", stats.standard_deviation());
Expand Down
2 changes: 1 addition & 1 deletion examples/integer_range_search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ fn main() -> Result<()> {
reader.reload()?;
let searcher = reader.searcher();
// The end is excluded i.e. here we are searching up to 1969
let docs_in_the_sixties = RangeQuery::new_u64(year_field, 1960..1970);
let docs_in_the_sixties = RangeQuery::new_u64("year".to_string(), 1960..1970);
// Uses a Count collector to sum the total number of docs in the range
let num_60s_books = searcher.search(&docs_in_the_sixties, &Count)?;
assert_eq!(num_60s_books, 10);
Expand Down
10 changes: 5 additions & 5 deletions examples/warmer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use std::sync::{Arc, RwLock, Weak};

use tantivy::collector::TopDocs;
use tantivy::query::QueryParser;
use tantivy::schema::{Field, Schema, FAST, TEXT};
use tantivy::schema::{Schema, FAST, TEXT};
use tantivy::{
doc, DocAddress, DocId, Index, IndexReader, Opstamp, Searcher, SearcherGeneration, SegmentId,
SegmentReader, Warmer,
Expand All @@ -25,13 +25,13 @@ pub trait PriceFetcher: Send + Sync + 'static {
}

struct DynamicPriceColumn {
field: Field,
field: String,
price_cache: RwLock<HashMap<(SegmentId, Option<Opstamp>), Arc<Vec<Price>>>>,
price_fetcher: Box<dyn PriceFetcher>,
}

impl DynamicPriceColumn {
pub fn with_product_id_field<T: PriceFetcher>(field: Field, price_fetcher: T) -> Self {
pub fn with_product_id_field<T: PriceFetcher>(field: String, price_fetcher: T) -> Self {
DynamicPriceColumn {
field,
price_cache: Default::default(),
Expand All @@ -48,7 +48,7 @@ impl Warmer for DynamicPriceColumn {
fn warm(&self, searcher: &Searcher) -> tantivy::Result<()> {
for segment in searcher.segment_readers() {
let key = (segment.segment_id(), segment.delete_opstamp());
let product_id_reader = segment.fast_fields().u64(self.field)?;
let product_id_reader = segment.fast_fields().u64(&self.field)?;
let product_ids: Vec<ProductId> = segment
.doc_ids_alive()
.map(|doc| product_id_reader.get_val(doc))
Expand Down Expand Up @@ -123,7 +123,7 @@ fn main() -> tantivy::Result<()> {

let price_table = ExternalPriceTable::default();
let price_dynamic_column = Arc::new(DynamicPriceColumn::with_product_id_field(
product_id,
"product_id".to_string(),
price_table.clone(),
));
price_table.update_price(OLIVE_OIL, 12);
Expand Down
14 changes: 4 additions & 10 deletions src/aggregation/agg_req_with_accessor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,10 +94,7 @@ impl BucketAggregationWithAccessor {
BucketAggregationType::Terms(TermsAggregation {
field: field_name, ..
}) => {
let field = reader
.schema()
.get_field(field_name)
.ok_or_else(|| TantivyError::FieldNotFound(field_name.to_string()))?;
let field = reader.schema().get_field(field_name)?;
inverted_index = Some(reader.inverted_index(field)?);
get_ff_reader_and_validate(reader, field_name, Cardinality::MultiValues)?
}
Expand Down Expand Up @@ -195,10 +192,7 @@ fn get_ff_reader_and_validate(
field_name: &str,
cardinality: Cardinality,
) -> crate::Result<(FastFieldAccessor, Type)> {
let field = reader
.schema()
.get_field(field_name)
.ok_or_else(|| TantivyError::FieldNotFound(field_name.to_string()))?;
let field = reader.schema().get_field(field_name)?;
let field_type = reader.schema().get_field_entry(field).field_type();

if let Some((_ff_type, field_cardinality)) = type_and_cardinality(field_type) {
Expand All @@ -218,10 +212,10 @@ fn get_ff_reader_and_validate(
let ff_fields = reader.fast_fields();
match cardinality {
Cardinality::SingleValue => ff_fields
.u64_lenient(field)
.u64_lenient(field_name)
.map(|field| (FastFieldAccessor::Single(field), field_type.value_type())),
Cardinality::MultiValues => ff_fields
.u64s_lenient(field)
.u64s_lenient(field_name)
.map(|field| (FastFieldAccessor::Multi(field), field_type.value_type())),
}
}
4 changes: 1 addition & 3 deletions src/aggregation/bucket/histogram/histogram.rs
Original file line number Diff line number Diff line change
Expand Up @@ -548,9 +548,7 @@ pub(crate) fn intermediate_histogram_buckets_to_final_buckets(
};

// If we have a date type on the histogram buckets, we add the `key_as_string` field as rfc339
let field = schema
.get_field(&histogram_req.field)
.ok_or_else(|| TantivyError::FieldNotFound(histogram_req.field.to_string()))?;
let field = schema.get_field(&histogram_req.field)?;
if schema.get_field_entry(field).field_type().is_date() {
for bucket in buckets.iter_mut() {
if let crate::aggregation::Key::F64(val) = bucket.key {
Expand Down
5 changes: 1 addition & 4 deletions src/aggregation/intermediate_agg_result.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ use super::{format_date, Key, SerializedKey, VecWithNames};
use crate::aggregation::agg_result::{AggregationResults, BucketEntries, BucketEntry};
use crate::aggregation::bucket::TermsAggregationInternal;
use crate::schema::Schema;
use crate::TantivyError;

/// Contains the intermediate aggregation result, which is optimized to be merged with other
/// intermediate results.
Expand Down Expand Up @@ -658,9 +657,7 @@ impl IntermediateRangeBucketEntry {

// If we have a date type on the histogram buckets, we add the `key_as_string` field as
// rfc339
let field = schema
.get_field(&range_req.field)
.ok_or_else(|| TantivyError::FieldNotFound(range_req.field.to_string()))?;
let field = schema.get_field(&range_req.field)?;
if schema.get_field_entry(field).field_type().is_date() {
if let Some(val) = range_bucket_entry.to {
let key_as_string = format_date(val as i64)?;
Expand Down
2 changes: 1 addition & 1 deletion src/collector/filter_collector_wrapper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ where

let fast_field_reader = segment_reader
.fast_fields()
.typed_fast_field_reader(self.field)?;
.typed_fast_field_reader(schema.get_field_name(self.field))?;

let segment_collector = self
.collector
Expand Down
20 changes: 11 additions & 9 deletions src/collector/histogram_collector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use fastfield_codecs::Column;

use crate::collector::{Collector, SegmentCollector};
use crate::fastfield::FastValue;
use crate::schema::{Field, Type};
use crate::schema::Type;
use crate::{DocId, Score};

/// Histogram builds an histogram of the values of a fastfield for the
Expand All @@ -28,7 +28,7 @@ pub struct HistogramCollector {
min_value: u64,
num_buckets: usize,
divider: DividerU64,
field: Field,
field: String,
}

impl HistogramCollector {
Expand All @@ -46,7 +46,7 @@ impl HistogramCollector {
/// # Disclaimer
/// This function panics if the field given is of type f64.
pub fn new<TFastValue: FastValue>(
field: Field,
field: String,
min_value: TFastValue,
bucket_width: u64,
num_buckets: usize,
Expand Down Expand Up @@ -112,7 +112,7 @@ impl Collector for HistogramCollector {
_segment_local_id: crate::SegmentOrdinal,
segment: &crate::SegmentReader,
) -> crate::Result<Self::Child> {
let ff_reader = segment.fast_fields().u64_lenient(self.field)?;
let ff_reader = segment.fast_fields().u64_lenient(&self.field)?;
Ok(SegmentHistogramCollector {
histogram_computer: HistogramComputer {
counts: vec![0; self.num_buckets],
Expand Down Expand Up @@ -211,13 +211,13 @@ mod tests {
#[test]
fn test_no_segments() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let val_field = schema_builder.add_u64_field("val_field", FAST);
schema_builder.add_u64_field("val_field", FAST);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let reader = index.reader()?;
let searcher = reader.searcher();
let all_query = AllQuery;
let histogram_collector = HistogramCollector::new(val_field, 0u64, 2, 5);
let histogram_collector = HistogramCollector::new("val_field".to_string(), 0u64, 2, 5);
let histogram = searcher.search(&all_query, &histogram_collector)?;
assert_eq!(histogram, vec![0; 5]);
Ok(())
Expand All @@ -238,7 +238,8 @@ mod tests {
let reader = index.reader()?;
let searcher = reader.searcher();
let all_query = AllQuery;
let histogram_collector = HistogramCollector::new(val_field, -20i64, 10u64, 4);
let histogram_collector =
HistogramCollector::new("val_field".to_string(), -20i64, 10u64, 4);
let histogram = searcher.search(&all_query, &histogram_collector)?;
assert_eq!(histogram, vec![1, 1, 0, 1]);
Ok(())
Expand All @@ -262,7 +263,8 @@ mod tests {
let reader = index.reader()?;
let searcher = reader.searcher();
let all_query = AllQuery;
let histogram_collector = HistogramCollector::new(val_field, -20i64, 10u64, 4);
let histogram_collector =
HistogramCollector::new("val_field".to_string(), -20i64, 10u64, 4);
let histogram = searcher.search(&all_query, &histogram_collector)?;
assert_eq!(histogram, vec![1, 1, 0, 1]);
Ok(())
Expand All @@ -285,7 +287,7 @@ mod tests {
let searcher = reader.searcher();
let all_query = AllQuery;
let week_histogram_collector = HistogramCollector::new(
date_field,
"date_field".to_string(),
DateTime::from_primitive(
Date::from_calendar_date(1980, Month::January, 1)?.with_hms(0, 0, 0)?,
),
Expand Down
10 changes: 6 additions & 4 deletions src/collector/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ impl SegmentCollector for TestSegmentCollector {
///
/// This collector is mainly useful for tests.
pub struct FastFieldTestCollector {
field: Field,
field: String,
}

pub struct FastFieldSegmentCollector {
Expand All @@ -164,7 +164,7 @@ pub struct FastFieldSegmentCollector {
}

impl FastFieldTestCollector {
pub fn for_field(field: Field) -> FastFieldTestCollector {
pub fn for_field(field: String) -> FastFieldTestCollector {
FastFieldTestCollector { field }
}
}
Expand All @@ -180,7 +180,7 @@ impl Collector for FastFieldTestCollector {
) -> crate::Result<FastFieldSegmentCollector> {
let reader = segment_reader
.fast_fields()
.u64(self.field)
.u64(&self.field)
.expect("Requested field is not a fast field.");
Ok(FastFieldSegmentCollector {
vals: Vec::new(),
Expand Down Expand Up @@ -238,7 +238,9 @@ impl Collector for BytesFastFieldTestCollector {
_segment_local_id: u32,
segment_reader: &SegmentReader,
) -> crate::Result<BytesFastFieldSegmentCollector> {
let reader = segment_reader.fast_fields().bytes(self.field)?;
let reader = segment_reader
.fast_fields()
.bytes(segment_reader.schema().get_field_name(self.field))?;
Ok(BytesFastFieldSegmentCollector {
vals: Vec::new(),
reader,
Expand Down
8 changes: 4 additions & 4 deletions src/collector/top_score_collector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ impl CustomScorer<u64> for ScorerByField {
// The conversion will then happen only on the top-K docs.
let ff_reader = segment_reader
.fast_fields()
.typed_fast_field_reader(self.field)?;
.typed_fast_field_reader(segment_reader.schema().get_field_name(self.field))?;
Ok(ScorerByFastFieldReader { ff_reader })
}
}
Expand Down Expand Up @@ -454,7 +454,7 @@ impl TopDocs {
/// // In our case, we will get a reader for the popularity
/// // fast field.
/// let popularity_reader =
/// segment_reader.fast_fields().u64(popularity).unwrap();
/// segment_reader.fast_fields().u64("popularity").unwrap();
///
/// // We can now define our actual scoring function
/// move |doc: DocId, original_score: Score| {
Expand Down Expand Up @@ -561,9 +561,9 @@ impl TopDocs {
/// // Note that this is implemented by using a `(u64, u64)`
/// // as a score.
/// let popularity_reader =
/// segment_reader.fast_fields().u64(popularity).unwrap();
/// segment_reader.fast_fields().u64("popularity").unwrap();
/// let boosted_reader =
/// segment_reader.fast_fields().u64(boosted).unwrap();
/// segment_reader.fast_fields().u64("boosted").unwrap();
///
/// // We can now define our actual scoring function
/// move |doc: DocId| {
Expand Down
2 changes: 1 addition & 1 deletion src/core/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ impl IndexBuilder {
fn validate(&self) -> crate::Result<()> {
if let Some(schema) = self.schema.as_ref() {
if let Some(sort_by_field) = self.index_settings.sort_by_field.as_ref() {
let schema_field = schema.get_field(&sort_by_field.field).ok_or_else(|| {
let schema_field = schema.get_field(&sort_by_field.field).map_err(|_| {
TantivyError::InvalidArgument(format!(
"Field to sort index {} not found in schema",
sort_by_field.field
Expand Down
3 changes: 2 additions & 1 deletion src/core/segment_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,8 @@ impl SegmentReader {

match field_entry.field_type() {
FieldType::Facet(_) => {
let term_ords_reader = self.fast_fields().u64s(field)?;
let term_ords_reader =
self.fast_fields().u64s(self.schema.get_field_name(field))?;
let termdict = self
.termdict_composite
.open_read(field)
Expand Down
5 changes: 2 additions & 3 deletions src/fastfield/bytes/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ mod tests {
index_writer.commit()?;
let searcher = index.reader()?.searcher();
let segment_reader = searcher.segment_reader(0);
let bytes_reader = segment_reader.fast_fields().bytes(bytes_field).unwrap();
let bytes_reader = segment_reader.fast_fields().bytes("bytesfield").unwrap();
assert_eq!(bytes_reader.get_bytes(0), &[0u8, 1, 2, 3]);
assert!(bytes_reader.get_bytes(1).is_empty());
assert_eq!(bytes_reader.get_bytes(2), &[255u8]);
Expand Down Expand Up @@ -109,8 +109,7 @@ mod tests {
let searcher = create_index_for_test(FAST)?;
assert_eq!(searcher.num_docs(), 1);
let fast_fields = searcher.segment_reader(0u32).fast_fields();
let field = searcher.schema().get_field("string_bytes").unwrap();
let fast_field_reader = fast_fields.bytes(field).unwrap();
let fast_field_reader = fast_fields.bytes("string_bytes").unwrap();
assert_eq!(fast_field_reader.get_bytes(0u32), b"tantivy");
Ok(())
}
Expand Down
Loading

0 comments on commit f687b3a

Please sign in to comment.