diff --git a/CHANGELOG.md b/CHANGELOG.md index 969ba677d..e0c976a98 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ See [STATUS.md](server/STATUS.md) to learn more about which features will remain - Fix `pageSize` property in Collections not using persistence - Add Table Ontology #25 - Fix Post endpoints not including search params in returned `@id` field. +- Rebuilding indexes done on separate thread, only once #616 #615 +- Don't require building index for populate commands ## [v0.34.2] - 2023-03-04 diff --git a/lib/src/db.rs b/lib/src/db.rs index ef7172966..5b30f45f9 100644 --- a/lib/src/db.rs +++ b/lib/src/db.rs @@ -577,24 +577,7 @@ impl Storelike for Db { } fn populate(&self) -> AtomicResult<()> { - // populate_base_models should be run in init, instead of here, since it will result in infinite loops without - crate::populate::populate_default_store(self) - .map_err(|e| format!("Failed to populate default store. {}", e))?; - // This is a potentially expensive operation, but is needed to make Queries work with the models created in here - self.build_index(true) - .map_err(|e| format!("Failed to build index. {}", e))?; - crate::populate::create_drive(self) - .map_err(|e| format!("Failed to create drive. {}", e))?; - crate::populate::set_drive_rights(self, true)?; - crate::populate::populate_collections(self) - .map_err(|e| format!("Failed to populate collections. {}", e))?; - crate::populate::populate_endpoints(self) - .map_err(|e| format!("Failed to populate endpoints. {}", e))?; - crate::populate::populate_importer(self) - .map_err(|e| format!("Failed to populate importer. {}", e))?; - crate::populate::populate_sidebar_items(self) - .map_err(|e| format!("Failed to populate sidebar items. {}", e))?; - Ok(()) + crate::populate::populate_all(self) } #[instrument(skip(self))] diff --git a/lib/src/db/prop_val_sub_index.rs b/lib/src/db/prop_val_sub_index.rs index 05effdd3d..2a3fc318b 100644 --- a/lib/src/db/prop_val_sub_index.rs +++ b/lib/src/db/prop_val_sub_index.rs @@ -13,16 +13,10 @@ pub fn find_in_prop_val_sub_index(store: &Db, prop: &str, val: Option<&Value>) - prefix.extend(value.to_sortable_string().as_bytes()); prefix.extend([SEPARATION_BIT]); } - Box::new( - store - .prop_val_sub_index - .scan_prefix(prefix) - .into_iter() - .map(|kv| { - let (key, _value) = kv?; - key_to_index_atom(&key) - }), - ) + Box::new(store.prop_val_sub_index.scan_prefix(prefix).map(|kv| { + let (key, _value) = kv?; + key_to_index_atom(&key) + })) } #[instrument(skip(store))] diff --git a/lib/src/db/test.rs b/lib/src/db/test.rs index bee3ae55d..514c8cb38 100644 --- a/lib/src/db/test.rs +++ b/lib/src/db/test.rs @@ -62,7 +62,6 @@ fn populate_collections() { let store = Db::init_temp("populate_collections").unwrap(); let subjects: Vec = store .all_resources(false) - .into_iter() .map(|r| r.get_subject().into()) .collect(); println!("{:?}", subjects); @@ -472,7 +471,6 @@ fn test_collection_update_value(store: &Db, property_url: &str, old_val: Value, ); let mut resources: Vec = (0..count) - .into_iter() .map(|_num| { let mut demo_resource = Resource::new_generate_subject(store); demo_resource diff --git a/lib/src/db/val_prop_sub_index.rs b/lib/src/db/val_prop_sub_index.rs index a81ffe91c..2b46d50d1 100644 --- a/lib/src/db/val_prop_sub_index.rs +++ b/lib/src/db/val_prop_sub_index.rs @@ -49,16 +49,10 @@ pub fn find_in_val_prop_sub_index(store: &Db, val: &Value, prop: Option<&str>) - prefix.extend(prop.as_bytes()); prefix.extend([SEPARATION_BIT]); } - Box::new( - store - .reference_index - .scan_prefix(prefix) - .into_iter() - .map(|kv| { - let (key, _value) = kv?; - key_to_index_atom(&key) - }), - ) + Box::new(store.reference_index.scan_prefix(prefix).map(|kv| { + let (key, _value) = kv?; + key_to_index_atom(&key) + })) } /// Parses a Value index key string, converts it into an atom. diff --git a/lib/src/parse.rs b/lib/src/parse.rs index 57e4fe8a7..28bce721c 100644 --- a/lib/src/parse.rs +++ b/lib/src/parse.rs @@ -38,7 +38,7 @@ pub struct ParseOpts { pub overwrite_outside: bool, } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq)] pub enum SaveOpts { /// Don't save the parsed resources to the store. /// No authorization checks will be performed. @@ -126,6 +126,15 @@ pub fn parse_json_ad_string( ), _other => return Err("Root JSON element must be an object or array.".into()), } + // For most save menthods, we need to add the atoms to the index here. + // The `Commit` feature adds to index by itself, so we can skip that step here. + if parse_opts.save != SaveOpts::Commit { + for res in &vec { + for atom in res.to_atoms() { + store.add_atom_to_index(&atom, res)?; + } + } + } Ok(vec) } diff --git a/lib/src/populate.rs b/lib/src/populate.rs index 921f23a63..ebb137685 100644 --- a/lib/src/populate.rs +++ b/lib/src/populate.rs @@ -136,7 +136,7 @@ pub fn populate_base_models(store: &impl Storelike) -> AtomicResult<()> { urls::PARENT.into(), Value::AtomicUrl("https://atomicdata.dev/properties".into()), ); - store.add_resource_opts(&resource, false, false, true)?; + store.add_resource_opts(&resource, false, true, true)?; } for c in classes { @@ -145,7 +145,7 @@ pub fn populate_base_models(store: &impl Storelike) -> AtomicResult<()> { urls::PARENT.into(), Value::AtomicUrl("https://atomicdata.dev/classes".into()), ); - store.add_resource_opts(&resource, false, false, true)?; + store.add_resource_opts(&resource, false, true, true)?; } Ok(()) @@ -265,7 +265,6 @@ pub fn populate_importer(store: &crate::Db) -> AtomicResult<()> { Ok(()) } -#[cfg(feature = "db")] /// Adds items to the SideBar as subresources. /// Useful for helping a new user get started. pub fn populate_sidebar_items(store: &crate::Db) -> AtomicResult<()> { @@ -276,7 +275,25 @@ pub fn populate_sidebar_items(store: &crate::Db) -> AtomicResult<()> { format!("{}/import", base), format!("{}/collections", base), ]; - drive.set_propval(urls::SUBRESOURCES.into(), arr.into(), store)?; + for item in arr { + drive.push_propval(urls::SUBRESOURCES, item.into(), true)?; + } drive.save_locally(store)?; Ok(()) } + +/// Runs all populate commands. Optionally runs index (blocking), which can be slow! +#[cfg(feature = "db")] +pub fn populate_all(store: &crate::Db) -> AtomicResult<()> { + // populate_base_models should be run in init, instead of here, since it will result in infinite loops without + populate_default_store(store) + .map_err(|e| format!("Failed to populate default store. {}", e))?; + create_drive(store).map_err(|e| format!("Failed to create drive. {}", e))?; + set_drive_rights(store, true)?; + populate_collections(store).map_err(|e| format!("Failed to populate collections. {}", e))?; + populate_endpoints(store).map_err(|e| format!("Failed to populate endpoints. {}", e))?; + populate_importer(store).map_err(|e| format!("Failed to populate importer. {}", e))?; + populate_sidebar_items(store) + .map_err(|e| format!("Failed to populate sidebar items. {}", e))?; + Ok(()) +} diff --git a/lib/src/storelike.rs b/lib/src/storelike.rs index 8c01f3f55..3b108b9cb 100644 --- a/lib/src/storelike.rs +++ b/lib/src/storelike.rs @@ -65,12 +65,14 @@ pub trait Storelike: Sized { /// Constructs the value index from all resources in the store. Could take a while. fn build_index(&self, include_external: bool) -> AtomicResult<()> { + tracing::info!("Building index (this could take a few minutes for larger databases)"); for r in self.all_resources(include_external) { for atom in r.to_atoms() { self.add_atom_to_index(&atom, &r) .map_err(|e| format!("Failed to add atom to index {}. {}", atom, e))?; } } + tracing::info!("Building index finished!"); Ok(()) } diff --git a/server/e2e_tests/e2e-generated.spec.ts b/server/e2e_tests/e2e-generated.spec.ts index 31e0da821..1f4206cbe 100644 --- a/server/e2e_tests/e2e-generated.spec.ts +++ b/server/e2e_tests/e2e-generated.spec.ts @@ -184,7 +184,7 @@ test.describe('data-browser', async () => { '[data-test="sort-https://atomicdata.dev/properties/description"]', ); // These values can change as new Properties are added to atomicdata.dev - const firstPageText = 'text=A base64'; + const firstPageText = "text=A base64 serialized JSON object"; const secondPageText = 'text=include-nested'; await expect(page.locator(firstPageText)).toBeVisible(); await page.click('[data-test="next-page"]'); diff --git a/server/src/appstate.rs b/server/src/appstate.rs index 31fbb987d..9da4eb9da 100644 --- a/server/src/appstate.rs +++ b/server/src/appstate.rs @@ -46,10 +46,6 @@ pub fn init(config: Config) -> AtomicServerResult { tracing::info!("Initialize: creating and populating new Database"); atomic_lib::populate::populate_default_store(&store) .map_err(|e| format!("Failed to populate default store. {}", e))?; - // Building the index here is needed to perform Queries on imported resources - tracing::info!("Building index (this could take a few minutes for larger databases)"); - store.build_index(true)?; - tracing::info!("Building index finished!"); } tracing::info!("Setting default agent"); @@ -81,7 +77,16 @@ pub fn init(config: Config) -> AtomicServerResult { tracing::info!( "Running initialization commands (first time startup, or you passed --initialize)" ); - store.populate()?; + + atomic_lib::populate::populate_all(&store)?; + // Building the index here is needed to perform Queries on imported resources + let store_clone = store.clone(); + std::thread::spawn(move || { + let res = store_clone.build_index(true); + if let Err(e) = res { + tracing::error!("Failed to build index: {}", e); + } + }); set_up_initial_invite(&store) .map_err(|e| format!("Error while setting up initial invite: {}", e))?; diff --git a/server/src/serve.rs b/server/src/serve.rs index 564e580f3..aeaf5cf53 100644 --- a/server/src/serve.rs +++ b/server/src/serve.rs @@ -8,7 +8,6 @@ fn rebuild_indexes(appstate: &crate::appstate::AppState) -> AtomicServerResult<( let appstate_clone = appstate.clone(); actix_web::rt::spawn(async move { - tracing::warn!("Building value index... This could take a while, expect worse performance until 'Building value index finished'"); appstate_clone .store .clear_index() @@ -17,7 +16,6 @@ fn rebuild_indexes(appstate: &crate::appstate::AppState) -> AtomicServerResult<( .store .build_index(true) .expect("Failed to build value index"); - tracing::info!("Building value index finished!"); }); tracing::info!("Removing existing search index..."); appstate_clone diff --git a/server/src/tests.rs b/server/src/tests.rs index 133b9c5c6..a8bc7f5c7 100644 --- a/server/src/tests.rs +++ b/server/src/tests.rs @@ -70,7 +70,7 @@ async fn server_tests() { let resp = test::call_service(&app, req.to_request()).await; let is_success = resp.status().is_success(); let body = get_body(resp); - println!("{:?}", body); + // println!("{:?}", body); assert!(is_success); assert!(body.as_str().contains("html")); @@ -78,7 +78,11 @@ async fn server_tests() { let req = test::TestRequest::with_uri("/properties").insert_header(("Accept", "application/ad+json")); let resp = test::call_service(&app, req.to_request()).await; - assert_eq!(resp.status().as_u16(), 200, "resource should be public"); + assert_eq!( + resp.status().as_u16(), + 200, + "properties collections should be found and public" + ); // Should 404 let req = test::TestRequest::with_uri("/doesnotexist") @@ -87,7 +91,7 @@ async fn server_tests() { let resp = test::call_service(&app, req).await; assert!(resp.status().is_client_error()); - // Edit the properties collection, make it hidden to the public agent + // Edit the main drive, make it hidden to the public agent let mut drive = store.get_resource(&appstate.config.server_url).unwrap(); drive .set_propval(