From e7aeabd564b6c983666bba9d1239ec1c2bdbf633 Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Champin Date: Sun, 24 May 2020 22:57:02 +0200 Subject: [PATCH 01/11] ByRefTerm streaming modes do not depend on sophia_term anymote --- sophia/src/dataset/inmem/_gspo_wrapper.rs | 12 ++++++------ sophia/src/dataset/inmem/_hash_dataset.rs | 2 +- sophia/src/dataset/inmem/_ogps_wrapper.rs | 12 ++++++------ sophia/src/graph/inmem/_hash_graph.rs | 2 +- sophia/src/graph/inmem/_ops_wrapper.rs | 12 ++++++------ sophia/src/graph/inmem/_spo_wrapper.rs | 12 ++++++------ sophia/src/quad/streaming_mode.rs | 19 ++++++++++--------- sophia/src/triple/streaming_mode.rs | 15 ++++++++------- 8 files changed, 44 insertions(+), 42 deletions(-) diff --git a/sophia/src/dataset/inmem/_gspo_wrapper.rs b/sophia/src/dataset/inmem/_gspo_wrapper.rs index 2ea0bbf9..377e097e 100644 --- a/sophia/src/dataset/inmem/_gspo_wrapper.rs +++ b/sophia/src/dataset/inmem/_gspo_wrapper.rs @@ -41,7 +41,7 @@ where impl DatasetWrapper for GspoWrapper where - T: IndexedDataset + Dataset::TermData>>, + T: IndexedDataset + Dataset::TermData>>>, { type Wrapped = T; @@ -192,35 +192,35 @@ where impl Dataset for GspoWrapper where - T: IndexedDataset + Dataset::TermData>>, + T: IndexedDataset + Dataset::TermData>>>, { impl_dataset_for_wrapper!(); } impl IndexedDataset for GspoWrapper where - T: IndexedDataset + Dataset::TermData>>, + T: IndexedDataset + Dataset::TermData>>>, { impl_indexed_dataset_for_wrapper!(); } impl CollectibleDataset for GspoWrapper where - T: IndexedDataset + Dataset::TermData>>, + T: IndexedDataset + Dataset::TermData>>>, { impl_collectible_dataset_for_indexed_dataset!(); } impl MutableDataset for GspoWrapper where - T: IndexedDataset + Dataset::TermData>>, + T: IndexedDataset + Dataset::TermData>>>, { impl_mutable_dataset_for_indexed_dataset!(); } impl SetDataset for GspoWrapper where - T: IndexedDataset + Dataset::TermData>>, + T: IndexedDataset + Dataset::TermData>>>, T: SetDataset, { } diff --git a/sophia/src/dataset/inmem/_hash_dataset.rs b/sophia/src/dataset/inmem/_hash_dataset.rs index ceeec55d..42631120 100644 --- a/sophia/src/dataset/inmem/_hash_dataset.rs +++ b/sophia/src/dataset/inmem/_hash_dataset.rs @@ -174,7 +174,7 @@ where ::TermData: 'static, { #[allow(clippy::type_complexity)] - type Quad = ByTermRefs<::TermData>; + type Quad = ByTermRefs::TermData>>; type Error = Infallible; fn quads(&self) -> DQuadSource { diff --git a/sophia/src/dataset/inmem/_ogps_wrapper.rs b/sophia/src/dataset/inmem/_ogps_wrapper.rs index 8a138861..0ac0a2f1 100644 --- a/sophia/src/dataset/inmem/_ogps_wrapper.rs +++ b/sophia/src/dataset/inmem/_ogps_wrapper.rs @@ -40,7 +40,7 @@ where impl DatasetWrapper for OgpsWrapper where - T: IndexedDataset + Dataset::TermData>>, + T: IndexedDataset + Dataset::TermData>>>, { type Wrapped = T; @@ -191,35 +191,35 @@ where impl Dataset for OgpsWrapper where - T: IndexedDataset + Dataset::TermData>>, + T: IndexedDataset + Dataset::TermData>>>, { impl_dataset_for_wrapper!(); } impl IndexedDataset for OgpsWrapper where - T: IndexedDataset + Dataset::TermData>>, + T: IndexedDataset + Dataset::TermData>>>, { impl_indexed_dataset_for_wrapper!(); } impl CollectibleDataset for OgpsWrapper where - T: IndexedDataset + Dataset::TermData>>, + T: IndexedDataset + Dataset::TermData>>>, { impl_collectible_dataset_for_indexed_dataset!(); } impl MutableDataset for OgpsWrapper where - T: IndexedDataset + Dataset::TermData>>, + T: IndexedDataset + Dataset::TermData>>>, { impl_mutable_dataset_for_indexed_dataset!(); } impl SetDataset for OgpsWrapper where - T: IndexedDataset + Dataset::TermData>>, + T: IndexedDataset + Dataset::TermData>>>, T: IndexedDataset + SetDataset, { } diff --git a/sophia/src/graph/inmem/_hash_graph.rs b/sophia/src/graph/inmem/_hash_graph.rs index 63362ae9..49c44324 100644 --- a/sophia/src/graph/inmem/_hash_graph.rs +++ b/sophia/src/graph/inmem/_hash_graph.rs @@ -137,7 +137,7 @@ where I::Index: Hash, ::TermData: 'static, { - type Triple = ByTermRefs<::TermData>; + type Triple = ByTermRefs::TermData>>; type Error = Infallible; fn triples(&self) -> GTripleSource { diff --git a/sophia/src/graph/inmem/_ops_wrapper.rs b/sophia/src/graph/inmem/_ops_wrapper.rs index bf05121c..86900532 100644 --- a/sophia/src/graph/inmem/_ops_wrapper.rs +++ b/sophia/src/graph/inmem/_ops_wrapper.rs @@ -38,7 +38,7 @@ where impl GraphWrapper for OpsWrapper where - T: IndexedGraph + Graph::TermData>>, + T: IndexedGraph + Graph::TermData>>>, { type Wrapped = T; @@ -144,35 +144,35 @@ where impl Graph for OpsWrapper where - T: IndexedGraph + Graph::TermData>>, + T: IndexedGraph + Graph::TermData>>>, { impl_graph_for_wrapper!(); } impl IndexedGraph for OpsWrapper where - T: IndexedGraph + Graph::TermData>>, + T: IndexedGraph + Graph::TermData>>>, { impl_indexed_graph_for_wrapper!(); } impl CollectibleGraph for OpsWrapper where - T: IndexedGraph + Graph::TermData>>, + T: IndexedGraph + Graph::TermData>>>, { impl_collectible_graph_for_indexed_graph!(); } impl MutableGraph for OpsWrapper where - T: IndexedGraph + Graph::TermData>>, + T: IndexedGraph + Graph::TermData>>>, { impl_mutable_graph_for_indexed_graph!(); } impl SetGraph for OpsWrapper where - T: IndexedGraph + Graph::TermData>>, + T: IndexedGraph + Graph::TermData>>>, T: SetGraph, { } diff --git a/sophia/src/graph/inmem/_spo_wrapper.rs b/sophia/src/graph/inmem/_spo_wrapper.rs index 1ffbdda5..f002ad9d 100644 --- a/sophia/src/graph/inmem/_spo_wrapper.rs +++ b/sophia/src/graph/inmem/_spo_wrapper.rs @@ -38,7 +38,7 @@ where impl GraphWrapper for SpoWrapper where - T: IndexedGraph + Graph::TermData>>, + T: IndexedGraph + Graph::TermData>>>, { type Wrapped = T; @@ -144,35 +144,35 @@ where impl Graph for SpoWrapper where - T: IndexedGraph + Graph::TermData>>, + T: IndexedGraph + Graph::TermData>>>, { impl_graph_for_wrapper!(); } impl IndexedGraph for SpoWrapper where - T: IndexedGraph + Graph::TermData>>, + T: IndexedGraph + Graph::TermData>>>, { impl_indexed_graph_for_wrapper!(); } impl CollectibleGraph for SpoWrapper where - T: IndexedGraph + Graph::TermData>>, + T: IndexedGraph + Graph::TermData>>>, { impl_collectible_graph_for_indexed_graph!(); } impl MutableGraph for SpoWrapper where - T: IndexedGraph + Graph::TermData>>, + T: IndexedGraph + Graph::TermData>>>, { impl_mutable_graph_for_indexed_graph!(); } impl SetGraph for SpoWrapper where - T: IndexedGraph + Graph::TermData>>, + T: IndexedGraph + Graph::TermData>>>, T: SetGraph, { } diff --git a/sophia/src/quad/streaming_mode.rs b/sophia/src/quad/streaming_mode.rs index 03b870ce..cfb6e12f 100644 --- a/sophia/src/quad/streaming_mode.rs +++ b/sophia/src/quad/streaming_mode.rs @@ -7,7 +7,8 @@ use std::marker::PhantomData; use std::ptr::NonNull; use crate::quad::Quad; -use sophia_term::{RefTerm, Term, TermData}; +use sophia_api::term::TTerm; +use sophia_term::RefTerm; mod _unsafe_quad; pub(crate) use _unsafe_quad::*; @@ -36,10 +37,10 @@ impl QuadStreamingMode for ByRefTerms { } /// See [module](./index.html) documentation. #[derive(Debug)] -pub struct ByTermRefs(PhantomData); -impl QuadStreamingMode for ByTermRefs { +pub struct ByTermRefs(PhantomData<*const T>); +impl QuadStreamingMode for ByTermRefs { #[allow(clippy::type_complexity)] - type UnsafeQuad = TermRefs<([NonNull>; 3], Option>>)>; + type UnsafeQuad = TermRefs<([NonNull; 3], Option>)>; } /// See [module](./index.html) documentation. @@ -107,13 +108,13 @@ impl<'a> StreamedQuad<'a, ByRefTerms> { } impl<'a, T> StreamedQuad<'a, ByTermRefs> where - T: TermData, + T: TTerm + ?Sized, { pub fn by_term_refs( - s: &'a Term, - p: &'a Term, - o: &'a Term, - g: Option<&'a Term>, + s: &'a T, + p: &'a T, + o: &'a T, + g: Option<&'a T>, ) -> Self { StreamedQuad { _phantom: PhantomData, diff --git a/sophia/src/triple/streaming_mode.rs b/sophia/src/triple/streaming_mode.rs index 2ea9fb29..9f646db4 100644 --- a/sophia/src/triple/streaming_mode.rs +++ b/sophia/src/triple/streaming_mode.rs @@ -40,7 +40,7 @@ //! it is constructed with [`StreamedTriple::by_ref`]; //! * [`ByRefTerms`]: [`StreamedTriple<'a>`] will wrap an array of 3 [`Term<&'a str>`]; //! it is constructed with [`StreamedTriple::by_ref_terms`]. -//! * [`ByTermRefs`]: [`StreamedTriple<'a>`] will wrap an array of 3 [`Term`] references, +//! * [`ByTermRefs`]: [`StreamedTriple<'a>`] will wrap an array of 3 [`&'a T`] references, //! valid as long as `'a`; //! it is constructed with [`StreamedTriple::by_term_refs`]. //! @@ -69,7 +69,8 @@ use std::marker::PhantomData; use std::ptr::NonNull; use crate::triple::Triple; -use sophia_term::{RefTerm, Term, TermData}; +use sophia_api::term::TTerm; +use sophia_term::RefTerm; mod _unsafe_triple; pub(crate) use _unsafe_triple::*; @@ -98,9 +99,9 @@ impl TripleStreamingMode for ByRefTerms { } /// See [module](./index.html) documentation. #[derive(Debug)] -pub struct ByTermRefs(PhantomData); -impl TripleStreamingMode for ByTermRefs { - type UnsafeTriple = TermRefs<[NonNull>; 3]>; +pub struct ByTermRefs(PhantomData<*const T>); +impl TripleStreamingMode for ByTermRefs { + type UnsafeTriple = TermRefs<[NonNull; 3]>; } /// See [module](./index.html) documentation. @@ -162,9 +163,9 @@ impl<'a> StreamedTriple<'a, ByRefTerms> { } impl<'a, T> StreamedTriple<'a, ByTermRefs> where - T: TermData, + T: TTerm + ?Sized, { - pub fn by_term_refs(s: &'a Term, p: &'a Term, o: &'a Term) -> Self { + pub fn by_term_refs(s: &'a T, p: &'a T, o: &'a T) -> Self { StreamedTriple { _phantom: PhantomData, wrapped: TermRefs([s.into(), p.into(), o.into()]), From ca600390b0384a4e81b61b6e3b3375879040d0b5 Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Champin Date: Thu, 28 May 2020 19:39:17 +0200 Subject: [PATCH 02/11] remove sophia_term dependencies in triple & quad modules they were replaced by two macros (make_scoped_triple_streaming_mode, make_scoped_quad_streaming_mode), that can be used to build safe streaming modes around specific implementations of Triple/Quad (as used in rio_common). --- sophia/src/parser/rio_common.rs | 41 ++++++++++++------- sophia/src/quad.rs | 1 - sophia/src/quad/stream/test.rs | 2 +- sophia/src/quad/streaming_mode.rs | 63 +++++++++++++++-------------- sophia/src/triple/streaming_mode.rs | 60 +++++++++++++++++---------- 5 files changed, 99 insertions(+), 68 deletions(-) diff --git a/sophia/src/parser/rio_common.rs b/sophia/src/parser/rio_common.rs index b3e86e06..856f2803 100644 --- a/sophia/src/parser/rio_common.rs +++ b/sophia/src/parser/rio_common.rs @@ -65,13 +65,17 @@ where } } +pub type RioSourceTriple<'a> = [RefTerm<'a>; 3]; +crate::make_scoped_triple_streming_mode!(ScopedRioSourceTriple, RioSourceTriple); + impl TripleSource for StrictRioSource where T: TriplesParser, E: Error + 'static, { type Error = E; - type Triple = crate::triple::streaming_mode::ByRefTerms; + //type Triple = crate::triple::streaming_mode::ByValue>; + type Triple = ScopedRioSourceTriple; fn try_for_some_triple(&mut self, f: &mut F) -> StreamResult where @@ -86,11 +90,11 @@ where } parser .parse_step(&mut |t| -> StdResult<(), MyStreamError> { - f(StreamedTriple::by_ref_terms( + f(StreamedTriple::scoped([ rio2refterm(t.subject.into()), rio2refterm(t.predicate.into()), rio2refterm(t.object.into()), - )) + ])) .map_err(MyStreamError::from_sink_error) }) .map_err(|e| e.into_stream_error()) @@ -100,13 +104,16 @@ where } } +pub type RioSourceQuad<'a> = ([RefTerm<'a>; 3], Option>); +crate::make_scoped_quad_streming_mode!(ScopedRioSourceQuad, RioSourceQuad); + impl QuadSource for StrictRioSource where T: QuadsParser, E: Error + 'static, { type Error = E; - type Quad = crate::quad::streaming_mode::ByRefTerms; + type Quad = ScopedRioSourceQuad; fn try_for_some_quad(&mut self, f: &mut F) -> StreamResult where @@ -121,12 +128,14 @@ where } parser .parse_step(&mut |q| -> StdResult<(), MyStreamError> { - f(StreamedQuad::by_ref_terms( - rio2refterm(q.subject.into()), - rio2refterm(q.predicate.into()), - rio2refterm(q.object.into()), + f(StreamedQuad::scoped(( + [ + rio2refterm(q.subject.into()), + rio2refterm(q.predicate.into()), + rio2refterm(q.object.into()), + ], q.graph_name.map(|g| rio2refterm(g.into())), - )) + ))) .map_err(MyStreamError::from_sink_error) }) .map_err(|e| e.into_stream_error()) @@ -157,7 +166,7 @@ where E: Error + 'static, { type Error = E; - type Quad = crate::quad::streaming_mode::ByRefTerms; + type Quad = ScopedRioSourceQuad; fn try_for_some_quad(&mut self, f: &mut F) -> StreamResult where @@ -172,12 +181,14 @@ where } parser .parse_step(&mut |q| -> StdResult<(), MyStreamError> { - f(StreamedQuad::by_ref_terms( - rio2refterm(q.subject), - rio2refterm(q.predicate), - rio2refterm(q.object), + f(StreamedQuad::scoped(( + [ + rio2refterm(q.subject), + rio2refterm(q.predicate), + rio2refterm(q.object), + ], q.graph_name.map(rio2refterm), - )) + ))) .map_err(MyStreamError::from_sink_error) }) .map_err(|e| e.into_stream_error()) diff --git a/sophia/src/quad.rs b/sophia/src/quad.rs index 90bde140..a6a75402 100644 --- a/sophia/src/quad.rs +++ b/sophia/src/quad.rs @@ -6,7 +6,6 @@ use crate::triple::*; use sophia_api::term::TTerm; -use sophia_term::*; pub mod stream; pub mod streaming_mode; diff --git a/sophia/src/quad/stream/test.rs b/sophia/src/quad/stream/test.rs index 9c0ff69e..4e120b30 100644 --- a/sophia/src/quad/stream/test.rs +++ b/sophia/src/quad/stream/test.rs @@ -4,7 +4,7 @@ use crate::quad::Quad; use crate::triple::stream::TripleSource; use lazy_static::lazy_static; use sophia_api::ns::{rdf, xsd}; -use sophia_term::BoxTerm; +use sophia_term::{BoxTerm, StaticTerm}; pub const NS: &'static str = "http://example.org/"; lazy_static! { diff --git a/sophia/src/quad/streaming_mode.rs b/sophia/src/quad/streaming_mode.rs index cfb6e12f..63481d6c 100644 --- a/sophia/src/quad/streaming_mode.rs +++ b/sophia/src/quad/streaming_mode.rs @@ -8,7 +8,6 @@ use std::ptr::NonNull; use crate::quad::Quad; use sophia_api::term::TTerm; -use sophia_term::RefTerm; mod _unsafe_quad; pub(crate) use _unsafe_quad::*; @@ -31,12 +30,6 @@ impl QuadStreamingMode for ByRef { } /// See [module](./index.html) documentation. #[derive(Debug)] -pub struct ByRefTerms {} -impl QuadStreamingMode for ByRefTerms { - type UnsafeQuad = ([RefTerm<'static>; 3], Option>); -} -/// See [module](./index.html) documentation. -#[derive(Debug)] pub struct ByTermRefs(PhantomData<*const T>); impl QuadStreamingMode for ByTermRefs { #[allow(clippy::type_complexity)] @@ -89,33 +82,11 @@ where } } } -impl<'a> StreamedQuad<'a, ByRefTerms> { - pub fn by_ref_terms( - s: RefTerm<'a>, - p: RefTerm<'a>, - o: RefTerm<'a>, - g: Option>, - ) -> Self { - let s = unsafe { std::mem::transmute(s) }; - let p = unsafe { std::mem::transmute(p) }; - let o = unsafe { std::mem::transmute(o) }; - let g = unsafe { std::mem::transmute(g) }; - StreamedQuad { - _phantom: PhantomData, - wrapped: ([s, p, o], g), - } - } -} impl<'a, T> StreamedQuad<'a, ByTermRefs> where T: TTerm + ?Sized, { - pub fn by_term_refs( - s: &'a T, - p: &'a T, - o: &'a T, - g: Option<&'a T>, - ) -> Self { + pub fn by_term_refs(s: &'a T, p: &'a T, o: &'a T, g: Option<&'a T>) -> Self { StreamedQuad { _phantom: PhantomData, wrapped: TermRefs(([s.into(), p.into(), o.into()], g.map(|g| g.into()))), @@ -141,6 +112,38 @@ where } } +/// A macro for creating a [streaming mode] for lifetime-parameterized Quad types. +/// +/// This macro expects two identifiers: +/// * the first one (`$mode`) will be the identifier of the streaming mode; +/// * the second one (`$tt`) is the name of a generic type implementing [`Quad`], +/// and expecting a single lifetime parameter. +/// +/// It declares the streaming mode type `$mode`, +/// and add an associated function named`scoped` to `StreamedQuad<'a, $mode>`, +/// to convert an instance of `$tt<'a>` to a streamed quad. +/// +/// [streaming mode]: triple/streaming_mode/index.html +/// [`Quad`]: quad/trait.Quad.html +#[macro_export] +macro_rules! make_scoped_quad_streming_mode { + ($mode: ident, $qt: ident) => { + #[derive(Debug)] + pub struct $mode(std::marker::PhantomData<$qt<'static>>); + impl $crate::quad::streaming_mode::QuadStreamingMode for $mode { + type UnsafeQuad = $qt<'static>; + } + + impl<'a> $crate::quad::streaming_mode::StreamedQuad<'a, $mode> { + pub fn scoped(quad: $qt<'a>) -> Self { + unsafe { + $crate::quad::streaming_mode::StreamedQuad::wrap(std::mem::transmute(quad)) + } + } + } + }; +} + // adapter pub(crate) use crate::triple::streaming_mode::FromTriple; diff --git a/sophia/src/triple/streaming_mode.rs b/sophia/src/triple/streaming_mode.rs index 9f646db4..68f86ee0 100644 --- a/sophia/src/triple/streaming_mode.rs +++ b/sophia/src/triple/streaming_mode.rs @@ -38,18 +38,19 @@ //! it is constructed with [`StreamedTriple::by_value`]; //! * [`ByRef`]: [`StreamedTriple<'a>`] will wrap a reference to `T`, valid as long as `'a`; //! it is constructed with [`StreamedTriple::by_ref`]; -//! * [`ByRefTerms`]: [`StreamedTriple<'a>`] will wrap an array of 3 [`Term<&'a str>`]; -//! it is constructed with [`StreamedTriple::by_ref_terms`]. //! * [`ByTermRefs`]: [`StreamedTriple<'a>`] will wrap an array of 3 [`&'a T`] references, //! valid as long as `'a`; //! it is constructed with [`StreamedTriple::by_term_refs`]. //! +//! In addition, the macro [`make_scoped_triple_streaming_mode`] +//! allows to create a streaming mode for any lifetime-parameterized type implementing +//! [`Triple`] (see its documentation for more details). +//! //! NB: actually, another mode exists, //! but is specifically designed for the [`graph::adapter`](../../graph/adapter/index.html) module, //! should never be needed in other contexts. //! //! [`ByRef`]: struct.ByRef.html -//! [`ByRefTerms`]: struct.ByRefTerms.html //! [`ByTermRefs`]: struct.ByTermRefs.html //! [`ByValue`]: struct.ByValue.html //! [Generic Associated Types]: https://github.com/rust-lang/rust/issues/44265 @@ -64,13 +65,13 @@ //! [`Triple`]: ../trait.Triple.html //! [`triples`]: ../../graph/trait.Graph.html#tymethod.triples //! [`TripleStreamingMode`]: trait.TripleStreamingMode.html +//! [`make_scoped_triple_streaming_mode`]: ../../macro.make_scoped_triple_streming_mode.html use std::marker::PhantomData; use std::ptr::NonNull; use crate::triple::Triple; use sophia_api::term::TTerm; -use sophia_term::RefTerm; mod _unsafe_triple; pub(crate) use _unsafe_triple::*; @@ -93,12 +94,6 @@ impl TripleStreamingMode for ByRef { } /// See [module](./index.html) documentation. #[derive(Debug)] -pub struct ByRefTerms {} -impl TripleStreamingMode for ByRefTerms { - type UnsafeTriple = [RefTerm<'static>; 3]; -} -/// See [module](./index.html) documentation. -#[derive(Debug)] pub struct ByTermRefs(PhantomData<*const T>); impl TripleStreamingMode for ByTermRefs { type UnsafeTriple = TermRefs<[NonNull; 3]>; @@ -150,17 +145,6 @@ where } } } -impl<'a> StreamedTriple<'a, ByRefTerms> { - pub fn by_ref_terms(s: RefTerm<'a>, p: RefTerm<'a>, o: RefTerm<'a>) -> Self { - let s = unsafe { std::mem::transmute(s) }; - let p = unsafe { std::mem::transmute(p) }; - let o = unsafe { std::mem::transmute(o) }; - StreamedTriple { - _phantom: PhantomData, - wrapped: [s, p, o], - } - } -} impl<'a, T> StreamedTriple<'a, ByTermRefs> where T: TTerm + ?Sized, @@ -188,6 +172,40 @@ where } } +/// A macro for creating a [streaming mode] for lifetime-parameterized Triple types. +/// +/// This macro expects two identifiers: +/// * the first one (`$mode`) will be the identifier of the streaming mode; +/// * the second one (`$tt`) is the name of a generic type implementing [`Triple`], +/// and expecting a single lifetime parameter. +/// +/// It declares the streaming mode type `$mode`, +/// and add an associated function named`scoped` to `StreamedTriple<'a, $mode>`, +/// to convert an instance of `$tt<'a>` to a streamed triple. +/// +/// [streaming mode]: triple/streaming_mode/index.html +/// [`Triple`]: triple/trait.Triple.html +#[macro_export] +macro_rules! make_scoped_triple_streming_mode { + ($mode: ident, $tt: ident) => { + #[derive(Debug)] + pub struct $mode(std::marker::PhantomData<$tt<'static>>); + impl $crate::triple::streaming_mode::TripleStreamingMode for $mode { + type UnsafeTriple = $tt<'static>; + } + + impl<'a> $crate::triple::streaming_mode::StreamedTriple<'a, $mode> { + pub fn scoped(triple: $tt<'a>) -> Self { + unsafe { + $crate::triple::streaming_mode::StreamedTriple::wrap(std::mem::transmute( + triple, + )) + } + } + } + }; +} + // adapter pub(crate) use crate::quad::streaming_mode::FromQuad; From cc6fd95b20baba4bd5bc00d18e0f6dd0419b56ca Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Champin Date: Fri, 29 May 2020 08:26:02 +0200 Subject: [PATCH 03/11] remove sophia_term dependencies in _isomorphism modules this required introducing AnyOrExactlyRef, a TermMatcher similar to AnyOrExactly, but meant to borrow its term instead of owning it. It was not possible to use extend AnyOrExactly like this impl<'a, T: TTerm> TermMatcher for AnyOrExactly<&'a T> because this would have been in conflict with the current impl: impl TermMatcher for AnyOrExactlt Indeed, `&'a T` could implement TTerm for some type T... --- api/src/term/_graph_name_matcher.rs | 57 ++++++++++++++++++++++- api/src/term/matcher.rs | 72 ++++++++++++++++++++++++++++- sophia/src/dataset/_isomorphism.rs | 11 ++--- sophia/src/graph/_isomorphism.rs | 9 ++-- 4 files changed, 136 insertions(+), 13 deletions(-) diff --git a/api/src/term/_graph_name_matcher.rs b/api/src/term/_graph_name_matcher.rs index 35bb4412..1d6dad68 100644 --- a/api/src/term/_graph_name_matcher.rs +++ b/api/src/term/_graph_name_matcher.rs @@ -1,6 +1,6 @@ // this module is transparently re-exported by its sibling `matcher` -use crate::term::matcher::{AnyOrExactly, AnyTerm}; +use crate::term::matcher::{AnyOrExactly, AnyOrExactlyRef, AnyTerm}; use crate::term::*; /// Generic trait for matching graph names, *i.e.* optional [term]s. @@ -63,6 +63,28 @@ where } } +impl<'a, U> GraphNameMatcher for AnyOrExactlyRef> +where + U: TTerm + ?Sized, +{ + type Term = U; + fn constant(&self) -> Option> { + match self { + AnyOrExactlyRef::Any => None, + AnyOrExactlyRef::Exactly(g) => Some(*g), + } + } + fn matches(&self, g: Option<&T>) -> bool + where + T: TTerm + ?Sized, + { + match self { + AnyOrExactlyRef::Any => true, + AnyOrExactlyRef::Exactly(gself) => same_graph_name(*gself, g), + } + } +} + impl GraphNameMatcher for Option<&U> where U: TTerm + ?Sized, @@ -228,6 +250,39 @@ mod test { assert!(!m.matches(n2.as_ref())); } + #[test] + fn test_aoer_any_as_matcher() { + let m = AnyOrExactlyRef::>::Any; + // comparing to a term using a differently cut, + // to make the test less obvious + let n0: Option = None; + let n1 = Some(SimpleIri::new("http://champin.net/#", Some("pa")).unwrap()); + let n2 = Some(SimpleIri::new("http://example.org/", None).unwrap()); + + let mc = GraphNameMatcher::constant(&m); + assert!(mc.is_none()); + assert!(m.matches(n0.as_ref())); + assert!(m.matches(n1.as_ref())); + assert!(m.matches(n2.as_ref())); + } + + #[test] + fn test_aoer_explicit_as_matcher() { + let t = SimpleIri::new("http://champin.net/#pa", None).unwrap(); + let m = AnyOrExactlyRef::Exactly(Some(&t)); + // comparing to a term using a differently cut, + // to make the test less obvious + let n0: Option = None; + let n1 = Some(SimpleIri::new("http://champin.net/#", Some("pa")).unwrap()); + let n2 = Some(SimpleIri::new("http://example.org/", None).unwrap()); + + let mc = GraphNameMatcher::constant(&m); + assert!(mc.is_some()); + assert!(same_graph_name(mc.unwrap(), n1.as_ref())); + assert!(!m.matches(n0.as_ref())); + assert!(m.matches(n1.as_ref())); + assert!(!m.matches(n2.as_ref())); + } #[test] fn test_option_as_matcher() { let g = SimpleIri::new("http://champin.net/#pa", None).unwrap(); diff --git a/api/src/term/matcher.rs b/api/src/term/matcher.rs index 0fba9928..021f4df4 100644 --- a/api/src/term/matcher.rs +++ b/api/src/term/matcher.rs @@ -55,7 +55,7 @@ impl TermMatcher for AnyTerm { } } -/// A matcher matching either any term, or only a specific one. +/// A matcher matching either any term, or only a specific owned term. pub enum AnyOrExactly { /// Match any term. Any, @@ -97,6 +97,48 @@ where } } +/// A matcher matching either any term, or only a specific borrowed term. +pub enum AnyOrExactlyRef { + /// Match any term. + Any, + /// Match only this term. + Exactly(T), +} + +impl From> for AnyOrExactlyRef +where + T: Sized, +{ + fn from(other: Option) -> AnyOrExactlyRef { + match other { + None => AnyOrExactlyRef::Any, + Some(t) => AnyOrExactlyRef::Exactly(t), + } + } +} + +impl<'a, U> TermMatcher for AnyOrExactlyRef<&'a U> +where + U: TTerm + ?Sized, +{ + type Term = U; + fn constant(&self) -> Option<&U> { + match self { + AnyOrExactlyRef::Any => None, + AnyOrExactlyRef::Exactly(t) => Some(t), + } + } + fn matches(&self, t: &T) -> bool + where + T: TTerm + ?Sized, + { + match self { + AnyOrExactlyRef::Any => true, + AnyOrExactlyRef::Exactly(tself) => term_eq(*tself, t), + } + } +} + impl TermMatcher for U where U: TTerm + ?Sized, @@ -249,6 +291,34 @@ mod test { assert!(!TermMatcher::matches(&m, &t2)); } + #[test] + fn test_aoer_any_as_matcher() { + let m = AnyOrExactlyRef::<&SimpleIri>::Any; + // comparing to a term using a differently cut, + // to make the test less obvious + let t1 = SimpleIri::new("http://champin.net/#", Some("pa")).unwrap(); + + let mc = TermMatcher::constant(&m); + assert!(mc.is_none()); + assert!(TermMatcher::matches(&m, &t1)); + } + + #[test] + fn test_aoer_exactly_as_matcher() { + let t = SimpleIri::new("http://champin.net/#pa", None).unwrap(); + let m = AnyOrExactlyRef::Exactly(&t); + // comparing to a term using a difSimferent term data, and differently cut, + // to make the test less obvious + let t1 = SimpleIri::new("http://champin.net/#", Some("pa")).unwrap(); + let t2 = SimpleIri::new("http://example.org/", None).unwrap(); + + let mc = TermMatcher::constant(&m); + assert!(mc.is_some()); + assert_eq!(mc.unwrap(), &t1); + assert!(TermMatcher::matches(&m, &t1)); + assert!(!TermMatcher::matches(&m, &t2)); + } + #[test] fn test_term_as_matcher() { let m = SimpleIri::new("http://champin.net/#pa", None).unwrap(); diff --git a/sophia/src/dataset/_isomorphism.rs b/sophia/src/dataset/_isomorphism.rs index 8ecce5e8..7ab79d27 100644 --- a/sophia/src/dataset/_isomorphism.rs +++ b/sophia/src/dataset/_isomorphism.rs @@ -6,12 +6,11 @@ use crate::dataset::{DQuad, DTerm, Dataset}; use crate::graph::{bn_mapper, hash_if_not_bn, match_ignore_bns}; use crate::quad::Quad; +use crate::term::matcher::AnyOrExactlyRef; use crate::triple::stream::{ SinkError, SinkResult as _, SourceError, SourceResult as _, StreamError, StreamResult, }; -use sophia_api::term::matcher::AnyOrExactly; use sophia_api::term::{TTerm, TermKind}; -use sophia_term::RefTerm; use std::collections::{BTreeSet, HashMap}; use std::error::Error; use std::fmt; @@ -182,14 +181,14 @@ where Ok(true) } -fn match_gname_ignore_bns(t: Option<&T>) -> AnyOrExactly> +fn match_gname_ignore_bns(g: Option<&T>) -> AnyOrExactlyRef> where T: TTerm + ?Sized, { - if t.map(TTerm::kind) == Some(TermKind::BlankNode) { - AnyOrExactly::Any + if g.map(TTerm::kind) == Some(TermKind::BlankNode) { + AnyOrExactlyRef::Any } else { - AnyOrExactly::Exactly(t.map(RefTerm::from)) + AnyOrExactlyRef::Exactly(g) } } diff --git a/sophia/src/graph/_isomorphism.rs b/sophia/src/graph/_isomorphism.rs index 2694c7de..10e74120 100644 --- a/sophia/src/graph/_isomorphism.rs +++ b/sophia/src/graph/_isomorphism.rs @@ -8,9 +8,8 @@ use crate::triple::stream::{ SinkError, SinkResult as _, SourceError, SourceResult as _, StreamError, StreamResult, }; use crate::triple::Triple; -use sophia_api::term::matcher::AnyOrExactly; +use sophia_api::term::matcher::AnyOrExactlyRef; use sophia_api::term::{term_hash, TTerm, TermKind}; -use sophia_term::RefTerm; use std::collections::{BTreeSet, HashMap}; use std::error::Error; use std::fmt; @@ -184,14 +183,14 @@ where Ok(true) } -pub(crate) fn match_ignore_bns(t: &T) -> AnyOrExactly +pub(crate) fn match_ignore_bns(t: &T) -> AnyOrExactlyRef<&T> where T: TTerm + ?Sized, { if t.kind() == TermKind::BlankNode { - AnyOrExactly::Any + AnyOrExactlyRef::Any } else { - AnyOrExactly::Exactly(RefTerm::from(t)) + AnyOrExactlyRef::Exactly(t) } } From 6a27b09771f75e16873e36b2c570f5bef13bc864 Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Champin Date: Wed, 17 Jun 2020 11:55:23 +0200 Subject: [PATCH 04/11] improve macros make_scoped_triple/quad_streaming_mode It was previously relying on adding an inherent impl block, which could not be done from another crate. This has been solved by introducing new utility traits (ScopedTripleMode / ScopedQuadMode). Also, a typo in the macro name has been fixed. --- sophia/src/parser/rio_common.rs | 4 ++-- sophia/src/quad/streaming_mode.rs | 25 ++++++++++++++++++++++--- sophia/src/triple/streaming_mode.rs | 29 +++++++++++++++++++++++++---- 3 files changed, 49 insertions(+), 9 deletions(-) diff --git a/sophia/src/parser/rio_common.rs b/sophia/src/parser/rio_common.rs index 856f2803..3ba2b132 100644 --- a/sophia/src/parser/rio_common.rs +++ b/sophia/src/parser/rio_common.rs @@ -66,7 +66,7 @@ where } pub type RioSourceTriple<'a> = [RefTerm<'a>; 3]; -crate::make_scoped_triple_streming_mode!(ScopedRioSourceTriple, RioSourceTriple); +crate::make_scoped_triple_streaming_mode!(ScopedRioSourceTriple, RioSourceTriple); impl TripleSource for StrictRioSource where @@ -105,7 +105,7 @@ where } pub type RioSourceQuad<'a> = ([RefTerm<'a>; 3], Option>); -crate::make_scoped_quad_streming_mode!(ScopedRioSourceQuad, RioSourceQuad); +crate::make_scoped_quad_streaming_mode!(ScopedRioSourceQuad, RioSourceQuad); impl QuadSource for StrictRioSource where diff --git a/sophia/src/quad/streaming_mode.rs b/sophia/src/quad/streaming_mode.rs index 63481d6c..daa881fc 100644 --- a/sophia/src/quad/streaming_mode.rs +++ b/sophia/src/quad/streaming_mode.rs @@ -93,6 +93,14 @@ where } } } +impl<'a, T> StreamedQuad<'a, T> +where + T: ScopedQuadMode<'a>, +{ + pub fn scoped(quad: T::SourceQuad) -> Self { + T::scoped(quad) + } +} impl<'a, T> Quad for StreamedQuad<'a, T> where T: QuadStreamingMode, @@ -126,7 +134,7 @@ where /// [streaming mode]: triple/streaming_mode/index.html /// [`Quad`]: quad/trait.Quad.html #[macro_export] -macro_rules! make_scoped_quad_streming_mode { +macro_rules! make_scoped_quad_streaming_mode { ($mode: ident, $qt: ident) => { #[derive(Debug)] pub struct $mode(std::marker::PhantomData<$qt<'static>>); @@ -134,8 +142,9 @@ macro_rules! make_scoped_quad_streming_mode { type UnsafeQuad = $qt<'static>; } - impl<'a> $crate::quad::streaming_mode::StreamedQuad<'a, $mode> { - pub fn scoped(quad: $qt<'a>) -> Self { + impl<'a> $crate::quad::streaming_mode::ScopedQuadMode<'a> for $mode { + type SourceQuad = $qt<'a>; + fn scoped(quad: $qt<'a>) -> $crate::quad::streaming_mode::StreamedQuad<'a, $mode> { unsafe { $crate::quad::streaming_mode::StreamedQuad::wrap(std::mem::transmute(quad)) } @@ -144,6 +153,16 @@ macro_rules! make_scoped_quad_streming_mode { }; } +/// A utility trait used internally by [`make_scoped_quad_streaming_mode`]. +/// It should not be implemented manually. +/// +/// [`make_scoped_quad_streaming_mode`]: ../../macro.make_scoped_quad_streaming_mode.html +pub trait ScopedQuadMode<'a>: QuadStreamingMode + Sized { + type SourceQuad: Quad + 'a; + /// Convert a quad + fn scoped(quad: Self::SourceQuad) -> StreamedQuad<'a, Self>; +} + // adapter pub(crate) use crate::triple::streaming_mode::FromTriple; diff --git a/sophia/src/triple/streaming_mode.rs b/sophia/src/triple/streaming_mode.rs index 68f86ee0..8e8ca630 100644 --- a/sophia/src/triple/streaming_mode.rs +++ b/sophia/src/triple/streaming_mode.rs @@ -65,7 +65,7 @@ //! [`Triple`]: ../trait.Triple.html //! [`triples`]: ../../graph/trait.Graph.html#tymethod.triples //! [`TripleStreamingMode`]: trait.TripleStreamingMode.html -//! [`make_scoped_triple_streaming_mode`]: ../../macro.make_scoped_triple_streming_mode.html +//! [`make_scoped_triple_streaming_mode`]: ../../macro.make_scoped_triple_streaming_mode.html use std::marker::PhantomData; use std::ptr::NonNull; @@ -156,6 +156,14 @@ where } } } +impl<'a, T> StreamedTriple<'a, T> +where + T: ScopedTripleMode<'a>, +{ + pub fn scoped(triple: T::SourceTriple) -> Self { + T::scoped(triple) + } +} impl<'a, T> Triple for StreamedTriple<'a, T> where T: TripleStreamingMode, @@ -186,7 +194,7 @@ where /// [streaming mode]: triple/streaming_mode/index.html /// [`Triple`]: triple/trait.Triple.html #[macro_export] -macro_rules! make_scoped_triple_streming_mode { +macro_rules! make_scoped_triple_streaming_mode { ($mode: ident, $tt: ident) => { #[derive(Debug)] pub struct $mode(std::marker::PhantomData<$tt<'static>>); @@ -194,8 +202,11 @@ macro_rules! make_scoped_triple_streming_mode { type UnsafeTriple = $tt<'static>; } - impl<'a> $crate::triple::streaming_mode::StreamedTriple<'a, $mode> { - pub fn scoped(triple: $tt<'a>) -> Self { + impl<'a> $crate::triple::streaming_mode::ScopedTripleMode<'a> for $mode { + type SourceTriple = $tt<'a>; + fn scoped( + triple: $tt<'a>, + ) -> $crate::triple::streaming_mode::StreamedTriple<'a, $mode> { unsafe { $crate::triple::streaming_mode::StreamedTriple::wrap(std::mem::transmute( triple, @@ -206,6 +217,16 @@ macro_rules! make_scoped_triple_streming_mode { }; } +/// A utility trait used internally by [`make_scoped_triple_streaming_mode`]. +/// It should not be implemented manually. +/// +/// [`make_scoped_triple_streaming_mode`]: ../../macro.make_scoped_triple_streaming_mode.html +pub trait ScopedTripleMode<'a>: TripleStreamingMode + Sized { + type SourceTriple: Triple + 'a; + /// Convert a triple + fn scoped(triple: Self::SourceTriple) -> StreamedTriple<'a, Self>; +} + // adapter pub(crate) use crate::quad::streaming_mode::FromQuad; From 4ff902e5e6a52504f530df9280de66ad32b187cc Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Champin Date: Wed, 17 Jun 2020 18:01:38 +0200 Subject: [PATCH 05/11] remove sophia_term dependency in graph/dataset tests This required the addition of a simple impl of TTerm in api/term/test.rs. For the moment, this module is publicly exposed; once graph and dataset move into sophia_api, it will be hidden behind feature 'test_macro'. --- api/src/term.rs | 3 + api/src/term/test.rs | 183 ++++++++++++++++++ sophia/src/dataset/_ext_impl.rs | 8 +- sophia/src/dataset/_traits.rs | 9 +- sophia/src/dataset/adapter/_dataset_graph.rs | 3 +- .../src/dataset/adapter/_graph_as_dataset.rs | 4 +- sophia/src/dataset/test.rs | 121 +++++------- sophia/src/graph/_ext_impl.rs | 8 +- sophia/src/graph/_traits.rs | 10 +- sophia/src/graph/test.rs | 160 +++++++-------- 10 files changed, 329 insertions(+), 180 deletions(-) create mode 100644 api/src/term/test.rs diff --git a/api/src/term.rs b/api/src/term.rs index 19dc1339..44b2d1ad 100644 --- a/api/src/term.rs +++ b/api/src/term.rs @@ -443,3 +443,6 @@ where term_format(self.0, fmt) } } + +//#[cfg(test)] +pub mod test; diff --git a/api/src/term/test.rs b/api/src/term/test.rs new file mode 100644 index 00000000..6e6da98f --- /dev/null +++ b/api/src/term/test.rs @@ -0,0 +1,183 @@ +//! Provide a naive implementation of TTerm for test purposes. +use super::*; +use crate::ns::rdf; +use std::fmt; +use std::hash; + +/// A naive implementation of TTerm, with no check whatsoever. +#[derive(Clone, Copy)] +pub struct TestTerm { + kind: TermKind, + value: T, + extra1: Option, + extra2: Option, +} + +impl<'a, T> TestTerm +where + T: From<&'a str>, +{ + pub fn iri(value: &'a str) -> Self { + TestTerm { + kind: TermKind::Iri, + value: value.into(), + extra1: None, + extra2: None, + } + } + pub fn iri2(ns: &'a str, suffix: &'a str) -> Self { + TestTerm { + kind: TermKind::Iri, + value: ns.into(), + extra1: Some(suffix.into()), + extra2: None, + } + } + pub fn bnode(value: &'a str) -> Self { + TestTerm { + kind: TermKind::BlankNode, + value: value.into(), + extra1: None, + extra2: None, + } + } + pub fn var(value: &'a str) -> Self { + TestTerm { + kind: TermKind::Variable, + value: value.into(), + extra1: None, + extra2: None, + } + } + pub fn lit_dt(value: &'a str, datatype: SimpleIri<'a>) -> Self { + let (extra1, extra2) = datatype.destruct(); + TestTerm { + kind: TermKind::Literal, + value: value.into(), + extra1: Some(extra1.into()), + extra2: extra2.map(From::from), + } + } + pub fn lit_lang(value: &'a str, tag: &'a str) -> Self { + TestTerm { + kind: TermKind::Literal, + value: value.into(), + extra1: None, + extra2: Some(tag.into()), + } + } +} + +impl TTerm for TestTerm +where + T: AsRef, +{ + fn kind(&self) -> TermKind { + self.kind + } + fn value_raw(&self) -> (&str, Option<&str>) { + match self.kind { + TermKind::Iri => ( + &self.value.as_ref(), + self.extra1.as_ref().map(|s| s.as_ref()), + ), + _ => (&self.value.as_ref(), None), + } + } + fn datatype(&self) -> Option { + if self.kind == TermKind::Literal { + Some(match self.extra1.as_ref() { + None => rdf::langString, + Some(ns) => { + SimpleIri::new_unchecked(ns.as_ref(), self.extra2.as_ref().map(|s| s.as_ref())) + } + }) + } else { + None + } + } + fn language(&self) -> Option<&str> { + if self.kind == TermKind::Literal && self.extra1.is_none() { + self.extra2.as_ref().map(|s| s.as_ref()) + } else { + None + } + } + fn as_dyn(&self) -> &dyn TTerm { + self + } +} + +impl fmt::Display for TestTerm +where + T: AsRef, +{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + term_format(self, f) + } +} + +impl PartialEq for TestTerm +where + T: TTerm + ?Sized, + U: AsRef, +{ + fn eq(&self, other: &T) -> bool { + term_eq(self, other) + } +} + +impl Eq for TestTerm where T: AsRef + Sized {} + +impl PartialOrd for TestTerm +where + T: TTerm + ?Sized, + U: AsRef, +{ + fn partial_cmp(&self, other: &T) -> Option { + Some(term_cmp(self, other)) + } +} + +impl hash::Hash for TestTerm +where + T: AsRef, +{ + fn hash(&self, state: &mut H) { + term_hash(self, state) + } +} + +impl From> for TestTerm<&'static str> { + fn from(other: SimpleIri<'static>) -> Self { + let (ns, suffix) = other.destruct(); + match suffix { + None => TestTerm::iri(ns), + Some(sf) => TestTerm::iri2(ns, sf), + } + } +} + +impl CopyTerm for TestTerm +where + T: for<'x> From<&'x str>, +{ + fn copy(term: &U) -> Self + where + U: TTerm + ?Sized, + { + let raw = term.value_raw(); + match term.kind() { + TermKind::Iri => match raw.1 { + None => TestTerm::iri(raw.0), + Some(sf) => TestTerm::iri2(raw.0, sf), + }, + TermKind::BlankNode => TestTerm::bnode(raw.0), + TermKind::Variable => TestTerm::var(raw.0), + TermKind::Literal => match term.language() { + None => TestTerm::lit_dt(raw.0, term.datatype().unwrap()), + Some(tag) => TestTerm::lit_lang(raw.0, tag), + }, + } + } +} diff --git a/sophia/src/dataset/_ext_impl.rs b/sophia/src/dataset/_ext_impl.rs index 1bcb20e8..8bfd1aee 100644 --- a/sophia/src/dataset/_ext_impl.rs +++ b/sophia/src/dataset/_ext_impl.rs @@ -194,8 +194,12 @@ mod test { use super::*; use crate::quad::TupleQuad; use sophia_api::ns::*; + use sophia_api::term::test::TestTerm; use sophia_api::term::SimpleIri; + #[allow(dead_code)] + type BoxTerm = TestTerm>; + static D: [TupleQuad; 3] = [ ([rdf::type_, rdf::type_, rdf::Property], None), ([rdf::Property, rdf::type_, rdfs::Class], None), @@ -214,7 +218,7 @@ mod test { } #[cfg(feature = "all_tests")] - type VecAsDataset = Vec<([sophia_term::BoxTerm; 3], Option)>; + type VecAsDataset = Vec<([BoxTerm; 3], Option)>; #[cfg(feature = "all_tests")] test_dataset_impl!(vec, VecAsDataset, false); @@ -229,7 +233,7 @@ mod test { } #[cfg(feature = "all_tests")] - type HashSetAsDataset = HashSet<([sophia_term::BoxTerm; 3], Option)>; + type HashSetAsDataset = HashSet<([BoxTerm; 3], Option)>; #[cfg(feature = "all_tests")] test_dataset_impl!(hashset, HashSetAsDataset); diff --git a/sophia/src/dataset/_traits.rs b/sophia/src/dataset/_traits.rs index 8a13d546..dad64b9f 100644 --- a/sophia/src/dataset/_traits.rs +++ b/sophia/src/dataset/_traits.rs @@ -15,7 +15,6 @@ use crate::quad::*; use crate::triple::stream::StreamResult; use sophia_api::term::matcher::*; use sophia_api::term::{same_graph_name, term_eq, TTerm, TermKind}; -use sophia_term::*; use crate::graph::insert_if_absent; @@ -60,9 +59,9 @@ pub trait Dataset { /// so it can be used in a `for` loop: /// ``` /// # use sophia::dataset::Dataset; - /// # use sophia::term::BoxTerm; + /// # use sophia_api::term::simple_iri::SimpleIri; /// # fn foo() -> Result<(), std::convert::Infallible> { - /// # let dataset = Vec::<[BoxTerm;4]>::new(); + /// # let dataset = Vec::<[SimpleIri;4]>::new(); /// for q in dataset.quads() { /// let q = q?; // rethrow error if any /// // do something with q @@ -76,10 +75,10 @@ pub trait Dataset { /// for example: /// ``` /// # use sophia::dataset::Dataset; - /// # use sophia::term::BoxTerm; + /// # use sophia_api::term::simple_iri::SimpleIri; /// # use sophia::quad::stream::QuadSource; /// # fn foo() -> Result<(), std::convert::Infallible> { - /// # let dataset = Vec::<[BoxTerm;4]>::new(); + /// # let dataset = Vec::<[SimpleIri;4]>::new(); /// dataset.quads().for_each_quad(|q| { /// // do something with q /// })?; // rethrow error if any diff --git a/sophia/src/dataset/adapter/_dataset_graph.rs b/sophia/src/dataset/adapter/_dataset_graph.rs index 90d5a503..8d12a0a7 100644 --- a/sophia/src/dataset/adapter/_dataset_graph.rs +++ b/sophia/src/dataset/adapter/_dataset_graph.rs @@ -187,9 +187,10 @@ mod test { use crate::quad::stream::QuadSource; use crate::triple::stream::TripleSource; use sophia_api::term::{same_graph_name, SimpleIri, TTerm}; - use sophia_term::BoxTerm; use std::collections::HashSet; + type BoxTerm = sophia_api::term::test::TestTerm>; + type MyQuad = ([BoxTerm; 3], Option); type MyDataset = HashSet; type MyDatasetGraph = DatasetGraph>>; diff --git a/sophia/src/dataset/adapter/_graph_as_dataset.rs b/sophia/src/dataset/adapter/_graph_as_dataset.rs index f26a29ae..f4aba5b5 100644 --- a/sophia/src/dataset/adapter/_graph_as_dataset.rs +++ b/sophia/src/dataset/adapter/_graph_as_dataset.rs @@ -390,11 +390,13 @@ mod test { use crate::graph::*; use crate::triple::stream::TripleSource; use sophia_api::ns::{rdf, rdfs}; - use sophia_term::{BoxTerm, StaticTerm}; use std::collections::HashSet; use std::convert::Infallible; use std::error::Error; + type BoxTerm = sophia_api::term::test::TestTerm>; + type StaticTerm = sophia_api::term::test::TestTerm<&'static str>; + const DG: Option<&'static StaticTerm> = None; type MyGraph = HashSet<[BoxTerm; 3]>; diff --git a/sophia/src/dataset/test.rs b/sophia/src/dataset/test.rs index f480e272..24352ab7 100644 --- a/sophia/src/dataset/test.rs +++ b/sophia/src/dataset/test.rs @@ -10,13 +10,15 @@ use crate::quad::*; use lazy_static::lazy_static; pub use sophia_api; // required when test macro is used in other packages use sophia_api::ns::*; +use sophia_api::term::test::TestTerm; use sophia_api::term::{CopiableTerm, CopyTerm}; -pub use sophia_term; // required when test macro is used in other packages -use sophia_term::*; + +type StaticTerm = TestTerm<&'static str>; +type BoxTerm = TestTerm>; lazy_static! { - pub static ref G1: StaticTerm = StaticTerm::new_iri_suffixed(NS, "G1").unwrap(); - pub static ref G2: StaticTerm = StaticTerm::new_iri_suffixed(NS, "G2").unwrap(); + pub static ref G1: StaticTerm = StaticTerm::iri2(NS, "G1"); + pub static ref G2: StaticTerm = StaticTerm::iri2(NS, "G2"); // pub static ref DG: Option = None; pub static ref GN1: Option = Some(*G1); @@ -29,7 +31,7 @@ pub fn no_quad() -> impl QuadSource { } pub fn some_quads() -> impl QuadSource { - let v = vec![ + let v: Vec<([StaticTerm; 3], Option)> = vec![ ([*C1, rdf::type_.into(), rdfs::Class.into()], *DG), ([*C1, rdf::type_.into(), rdfs::Class.into()], *GN1), ([*C2, rdf::type_.into(), rdfs::Class.into()], *DG), @@ -52,12 +54,11 @@ pub fn some_quads() -> impl QuadSource { ([*I1B, *P1, *I2B], *GN2), ([*I2A, *P2, *I2B], *GN2), ]; - v.into_iter().as_quad_source() } pub fn strict_node_types_quads() -> impl QuadSource { - vec![ + let v: Vec<([StaticTerm; 3], Option)> = vec![ ( [rdf::type_.into(), rdf::type_.into(), rdf::Property.into()], Some(rdf::type_.into()), @@ -66,13 +67,12 @@ pub fn strict_node_types_quads() -> impl QuadSource { ([*B2, rdf::type_.into(), *B1], None), ([*B2, rdf::type_.into(), *L2], None), ([*B2, rdf::type_.into(), *L2E], None), - ] - .into_iter() - .as_quad_source() + ]; + v.into_iter().as_quad_source() } pub fn generalized_node_types_quads() -> impl QuadSource { - vec![ + let v: Vec<([StaticTerm; 3], Option)> = vec![ ( [rdf::type_.into(), rdf::type_.into(), rdf::Property.into()], Some(rdf::type_.into()), @@ -81,9 +81,8 @@ pub fn generalized_node_types_quads() -> impl QuadSource { ([*L2, *L1, *L1], Some(*L2)), ([*V1, *V2, *V3], Some(*V3)), ([*B2, *V1, *L2E], None), - ] - .into_iter() - .as_quad_source() + ]; + v.into_iter().as_quad_source() } pub fn as_box_q(quad: Result) -> ([BoxTerm; 3], Option) @@ -93,7 +92,7 @@ where let quad = quad.unwrap(); ( [quad.s().copied(), quad.p().copied(), quad.o().copied()], - quad.g().map(BoxTerm::copy), + quad.g().map(CopyTerm::copy), ) } @@ -330,8 +329,6 @@ macro_rules! test_dataset_impl { use self::sophia_api::ns::*; use self::sophia_api::term::TTerm; use self::sophia_api::term::matcher::ANY; - use self::sophia_term::*; - use self::sophia_term::literal::convert::AsLiteral; #[allow(unused_imports)] use super::*; @@ -721,17 +718,14 @@ macro_rules! test_dataset_impl { let subjects = d.subjects().unwrap(); assert_eq!(subjects.len(), 8); - - let rsubjects: std::collections::HashSet<_> = - subjects.iter().map(|t| t.as_ref_str()).collect(); - assert!(rsubjects.contains(&C1)); - assert!(rsubjects.contains(&C2)); - assert!(rsubjects.contains(&P1)); - assert!(rsubjects.contains(&P2)); - assert!(rsubjects.contains(&I1A)); - assert!(rsubjects.contains(&I1B)); - assert!(rsubjects.contains(&I2A)); - assert!(rsubjects.contains(&I2B)); + assert_contains(&subjects, &*C1); + assert_contains(&subjects, &*C2); + assert_contains(&subjects, &*P1); + assert_contains(&subjects, &*P2); + assert_contains(&subjects, &*I1A); + assert_contains(&subjects, &*I1B); + assert_contains(&subjects, &*I2A); + assert_contains(&subjects, &*I2B); Ok(()) } @@ -741,15 +735,12 @@ macro_rules! test_dataset_impl { let predicates = d.predicates().unwrap(); assert_eq!(predicates.len(), 6); - - let rpredicates: std::collections::HashSet<_> = - predicates.iter().map(|t| t.as_ref_str()).collect(); - assert!(rpredicates.contains(&rdf::type_.into())); - assert!(rpredicates.contains(&rdfs::subClassOf.into())); - assert!(rpredicates.contains(&rdfs::domain.into())); - assert!(rpredicates.contains(&rdfs::range.into())); - assert!(rpredicates.contains(&P1)); - assert!(rpredicates.contains(&P2)); + assert_contains(&predicates, &rdf::type_); + assert_contains(&predicates, &rdfs::subClassOf); + assert_contains(&predicates, &rdfs::domain); + assert_contains(&predicates, &rdfs::range); + assert_contains(&predicates, &*P1); + assert_contains(&predicates, &*P2); Ok(()) } @@ -759,16 +750,13 @@ macro_rules! test_dataset_impl { let objects = d.objects().unwrap(); assert_eq!(objects.len(), 7); - - let robjects: std::collections::HashSet<_> = - objects.iter().map(|t| t.as_ref_str()).collect(); - assert!(robjects.contains(&rdf::Property.into())); - assert!(robjects.contains(&rdfs::Class.into())); - assert!(robjects.contains(&rdfs::Resource.into())); - assert!(robjects.contains(&C1)); - assert!(robjects.contains(&C2)); - assert!(robjects.contains(&I2A)); - assert!(robjects.contains(&I2B)); + assert_contains(&objects, &rdf::Property); + assert_contains(&objects, &rdfs::Class); + assert_contains(&objects, &rdfs::Resource); + assert_contains(&objects, &*C1); + assert_contains(&objects, &*C2); + assert_contains(&objects, &*I2A); + assert_contains(&objects, &*I2B); Ok(()) } @@ -778,11 +766,8 @@ macro_rules! test_dataset_impl { let graph_names = d.graph_names().unwrap(); assert_eq!(graph_names.len(), 2); - - let rgraph_names: std::collections::HashSet = - graph_names.iter().map(|t| t.as_ref_str()).collect(); - assert!(rgraph_names.contains(&G1)); - assert!(rgraph_names.contains(&G2)); + assert_contains(&graph_names, &*G1); + assert_contains(&graph_names, &*G2); Ok(()) } @@ -796,11 +781,8 @@ macro_rules! test_dataset_impl { let iris = d.iris().unwrap(); assert_eq!(iris.len(), 2); - - let riris: std::collections::HashSet<_> = - iris.iter().map(|t| t.as_ref_str()).collect(); - assert!(riris.contains(&rdf::Property.into())); - assert!(riris.contains(&rdf::type_.into())); + assert_contains(&iris, &rdf::Property); + assert_contains(&iris, &rdf::type_); Ok(()) } @@ -814,11 +796,8 @@ macro_rules! test_dataset_impl { let bnodes = d.bnodes().unwrap(); assert_eq!(bnodes.len(), 2); - - let rbnodes: std::collections::HashSet<_> = - bnodes.iter().map(|t| t.value()).collect(); - assert!(rbnodes.contains("1")); - assert!(rbnodes.contains("2")); + assert_contains(&bnodes, &*B1); + assert_contains(&bnodes, &*B2); Ok(()) } @@ -832,12 +811,9 @@ macro_rules! test_dataset_impl { let literals = d.literals().unwrap(); assert_eq!(literals.len(), 3); - - let rliterals: std::collections::HashSet<_> = - literals.iter().map(|t| t.as_ref_str()).collect(); - assert!(rliterals.contains(&"lit1".as_literal().into())); - assert!(rliterals.contains(&"lit2".as_literal().into())); - assert!(rliterals.contains(&StaticTerm::new_literal_lang("lit2", "en").unwrap())); + assert_contains(&literals, &*L1); + assert_contains(&literals, &*L2); + assert_contains(&literals, &*L2E); Ok(()) } @@ -848,12 +824,9 @@ macro_rules! test_dataset_impl { let variables = d.variables().unwrap(); assert_eq!(variables.len(), 3); - - let rvariables: std::collections::HashSet<_> = - variables.iter().map(|t| t.value()).collect(); - assert!(rvariables.contains("v1")); - assert!(rvariables.contains("v2")); - assert!(rvariables.contains("v3")); + assert_contains(&variables, &*V1); + assert_contains(&variables, &*V2); + assert_contains(&variables, &*V3); } else { let d: $dataset_impl = $dataset_collector(strict_node_types_quads()).unwrap(); diff --git a/sophia/src/graph/_ext_impl.rs b/sophia/src/graph/_ext_impl.rs index e8555556..9485dd68 100644 --- a/sophia/src/graph/_ext_impl.rs +++ b/sophia/src/graph/_ext_impl.rs @@ -166,8 +166,12 @@ impl<'a, T, S: BuildHasher> SetGraph for HashSet where T: Eq + Hash + Trip mod test { use super::*; use sophia_api::ns::*; + use sophia_api::term::test::TestTerm; use sophia_api::term::SimpleIri; + #[allow(dead_code)] + type BoxTerm = TestTerm>; + static G: [[SimpleIri; 3]; 3] = [ [rdf::type_, rdf::type_, rdf::Property], [rdf::Property, rdf::type_, rdfs::Class], @@ -183,7 +187,7 @@ mod test { } #[cfg(feature = "all_tests")] - type VecAsGraph = Vec<[sophia_term::BoxTerm; 3]>; + type VecAsGraph = Vec<[BoxTerm; 3]>; #[cfg(feature = "all_tests")] test_graph_impl!(vec, VecAsGraph, false); @@ -198,7 +202,7 @@ mod test { } #[cfg(feature = "all_tests")] - type HashSetAsGraph = HashSet<[sophia_term::BoxTerm; 3]>; + type HashSetAsGraph = HashSet<[BoxTerm; 3]>; #[cfg(feature = "all_tests")] test_graph_impl!(hashset, HashSetAsGraph); diff --git a/sophia/src/graph/_traits.rs b/sophia/src/graph/_traits.rs index 0e8bf0a6..dfaeb731 100644 --- a/sophia/src/graph/_traits.rs +++ b/sophia/src/graph/_traits.rs @@ -12,7 +12,6 @@ use crate::triple::streaming_mode::*; use crate::triple::*; use sophia_api::term::matcher::TermMatcher; use sophia_api::term::{term_eq, TTerm, TermKind}; -use sophia_term::*; use std::convert::Infallible; use std::error::Error; @@ -60,9 +59,9 @@ pub trait Graph { /// so it can be used in a `for` loop: /// ``` /// # use sophia::graph::Graph; - /// # use sophia::term::BoxTerm; + /// # use sophia_api::term::simple_iri::SimpleIri; /// # fn foo() -> Result<(), std::convert::Infallible> { - /// # let graph = Vec::<[BoxTerm;3]>::new(); + /// # let graph = Vec::<[SimpleIri;3]>::new(); /// for t in graph.triples() { /// let t = t?; // rethrow error if any /// // do something with t @@ -76,10 +75,10 @@ pub trait Graph { /// for example: /// ``` /// # use sophia::graph::Graph; - /// # use sophia::term::BoxTerm; + /// # use sophia_api::term::simple_iri::SimpleIri; /// # use sophia::triple::stream::TripleSource; /// # fn foo() -> Result<(), std::convert::Infallible> { - /// # let graph = Vec::<[BoxTerm;3]>::new(); + /// # let graph = Vec::<[SimpleIri;3]>::new(); /// graph.triples().for_each_triple(|t| { /// // do something with t /// })?; // rethrow error if any @@ -430,7 +429,6 @@ pub trait MutableGraph: Graph { /// # Usage /// ``` /// # use sophia_api::ns::{Namespace, rdf, rdfs, xsd}; - /// # use sophia_term::BoxTerm; /// # use sophia::graph::{MutableGraph, MGResult}; /// # use std::collections::HashSet; /// diff --git a/sophia/src/graph/test.rs b/sophia/src/graph/test.rs index 626a1adb..5bd1707d 100644 --- a/sophia/src/graph/test.rs +++ b/sophia/src/graph/test.rs @@ -7,33 +7,33 @@ use crate::triple::stream::*; use crate::triple::streaming_mode::{TripleStreamingMode, UnsafeTriple}; use crate::triple::*; use lazy_static::lazy_static; -pub use sophia_api; // required when test macro is used in other packages +pub use sophia_api; use sophia_api::ns::*; +use sophia_api::term::test::TestTerm; use sophia_api::term::CopiableTerm; -pub use sophia_term; // required when test macro is used in other packages -use sophia_term::literal::convert::AsLiteral; -use sophia_term::*; -pub const NS: &str = "http://example.org/"; +type StaticTerm = TestTerm<&'static str>; +type BoxTerm = TestTerm>; +pub const NS: &str = "http://example.org/"; lazy_static! { - pub static ref C1: StaticTerm = StaticTerm::new_iri_suffixed(NS, "C1").unwrap(); - pub static ref C2: StaticTerm = StaticTerm::new_iri_suffixed(NS, "C2").unwrap(); - pub static ref P1: StaticTerm = StaticTerm::new_iri_suffixed(NS, "p1").unwrap(); - pub static ref P2: StaticTerm = StaticTerm::new_iri_suffixed(NS, "p2").unwrap(); - pub static ref I1A: StaticTerm = StaticTerm::new_iri_suffixed(NS, "I1A").unwrap(); - pub static ref I1B: StaticTerm = StaticTerm::new_iri_suffixed(NS, "I1B").unwrap(); - pub static ref I2A: StaticTerm = StaticTerm::new_iri_suffixed(NS, "I2A").unwrap(); - pub static ref I2B: StaticTerm = StaticTerm::new_iri_suffixed(NS, "I2B").unwrap(); + pub static ref C1: StaticTerm = StaticTerm::iri2(NS, "C1"); + pub static ref C2: StaticTerm = StaticTerm::iri2(NS, "C2"); + pub static ref P1: StaticTerm = StaticTerm::iri2(NS, "p1"); + pub static ref P2: StaticTerm = StaticTerm::iri2(NS, "p2"); + pub static ref I1A: StaticTerm = StaticTerm::iri2(NS, "I1A"); + pub static ref I1B: StaticTerm = StaticTerm::iri2(NS, "I1B"); + pub static ref I2A: StaticTerm = StaticTerm::iri2(NS, "I2A"); + pub static ref I2B: StaticTerm = StaticTerm::iri2(NS, "I2B"); // - pub static ref B1: StaticTerm = StaticTerm::new_bnode("1").unwrap(); - pub static ref B2: StaticTerm = StaticTerm::new_bnode("2").unwrap(); - pub static ref L1: StaticTerm = "lit1".as_literal().into(); - pub static ref L2: StaticTerm = "lit2".as_literal().into(); - pub static ref L2E: StaticTerm = StaticTerm::new_literal_lang("lit2", "en").unwrap(); - pub static ref V1: StaticTerm = StaticTerm::new_variable("v1").unwrap(); - pub static ref V2: StaticTerm = StaticTerm::new_variable("v2").unwrap(); - pub static ref V3: StaticTerm = StaticTerm::new_variable("v3").unwrap(); + pub static ref B1: StaticTerm = StaticTerm::bnode("1"); + pub static ref B2: StaticTerm = StaticTerm::bnode("2"); + pub static ref L1: StaticTerm = StaticTerm::lit_dt("lit1", xsd::string); + pub static ref L2: StaticTerm = StaticTerm::lit_dt("lit2", xsd::string); + pub static ref L2E: StaticTerm = StaticTerm::lit_lang("lit2", "en"); + pub static ref V1: StaticTerm = StaticTerm::var("v1"); + pub static ref V2: StaticTerm = StaticTerm::var("v2"); + pub static ref V3: StaticTerm = StaticTerm::var("v3"); } pub fn no_triple() -> impl TripleSource { @@ -42,7 +42,7 @@ pub fn no_triple() -> impl TripleSource { } pub fn some_triples() -> impl TripleSource { - vec![ + let v: Vec<[StaticTerm; 3]> = vec![ [*C1, rdf::type_.into(), rdfs::Class.into()], [*C2, rdf::type_.into(), rdfs::Class.into()], [*C2, rdf::type_.into(), rdfs::Resource.into()], @@ -64,33 +64,30 @@ pub fn some_triples() -> impl TripleSource { [*I1A, *P1, *I2A], [*I1B, *P1, *I2B], [*I2A, *P2, *I2B], - ] - .into_iter() - .as_triple_source() + ]; + v.into_iter().as_triple_source() } pub fn strict_node_types_triples() -> impl TripleSource { - vec![ + let v: Vec<[StaticTerm; 3]> = vec![ [rdf::type_.into(), rdf::type_.into(), rdf::Property.into()], [*B1, rdf::type_.into(), *L1], [*B2, rdf::type_.into(), *B1], [*B2, rdf::type_.into(), *L2], [*B2, rdf::type_.into(), *L2E], - ] - .into_iter() - .as_triple_source() + ]; + v.into_iter().as_triple_source() } pub fn generalized_node_types_triples() -> impl TripleSource { - vec![ + let v: Vec<[StaticTerm; 3]> = vec![ [rdf::type_.into(), rdf::type_.into(), rdf::Property.into()], [*B1, *B2, *B1], [*L2, *L1, *L1], [*V1, *V2, *V3], [*B2, *V1, *L2E], - ] - .into_iter() - .as_triple_source() + ]; + v.into_iter().as_triple_source() } pub fn as_box_t(triple: Result) -> [BoxTerm; 3] @@ -128,6 +125,15 @@ pub fn assert_consistent_hint(val: usize, hint: (usize, Option)) { ) } +pub fn assert_contains<'a, I, T, U>(collection: I, item: &U) +where + I: IntoIterator, + T: 'a, + U: PartialEq, +{ + assert!(collection.into_iter().any(|i| item == i)) +} + /// Generates a test suite for implementations of /// [`Graph`], [`CollectibleGraph`] and [`MutableGraph`]. /// @@ -286,11 +292,8 @@ macro_rules! test_graph_impl { mod $module_name { use $crate::graph::test::*; use $crate::graph::*; - use self::sophia_api::ns::*; - use self::sophia_api::term::TTerm; + use $crate::ns::*; use self::sophia_api::term::matcher::ANY; - use self::sophia_term::StaticTerm; - use self::sophia_term::literal::convert::AsLiteral; #[allow(unused_imports)] use super::*; @@ -476,17 +479,14 @@ macro_rules! test_graph_impl { let subjects = g.subjects().unwrap(); assert_eq!(subjects.len(), 8); - - let rsubjects: std::collections::HashSet<_> = - subjects.iter().map(|t| t.as_ref_str()).collect(); - assert!(rsubjects.contains(&C1)); - assert!(rsubjects.contains(&C2)); - assert!(rsubjects.contains(&P1)); - assert!(rsubjects.contains(&P2)); - assert!(rsubjects.contains(&I1A)); - assert!(rsubjects.contains(&I1B)); - assert!(rsubjects.contains(&I2A)); - assert!(rsubjects.contains(&I2B)); + assert_contains(&subjects, &*C1); + assert_contains(&subjects, &*C2); + assert_contains(&subjects, &*P1); + assert_contains(&subjects, &*P2); + assert_contains(&subjects, &*I1A); + assert_contains(&subjects, &*I1B); + assert_contains(&subjects, &*I2A); + assert_contains(&subjects, &*I2B); Ok(()) } @@ -496,15 +496,12 @@ macro_rules! test_graph_impl { let predicates = g.predicates().unwrap(); assert_eq!(predicates.len(), 6); - - let rpredicates: std::collections::HashSet<_> = - predicates.iter().map(|t| t.as_ref_str()).collect(); - assert!(rpredicates.contains(&rdf::type_.into())); - assert!(rpredicates.contains(&rdfs::subClassOf.into())); - assert!(rpredicates.contains(&rdfs::domain.into())); - assert!(rpredicates.contains(&rdfs::range.into())); - assert!(rpredicates.contains(&P1)); - assert!(rpredicates.contains(&P2)); + assert_contains(&predicates, &rdf::type_); + assert_contains(&predicates, &rdfs::subClassOf); + assert_contains(&predicates, &rdfs::domain); + assert_contains(&predicates, &rdfs::range); + assert_contains(&predicates, &*P1); + assert_contains(&predicates, &*P2); Ok(()) } @@ -514,16 +511,13 @@ macro_rules! test_graph_impl { let objects = g.objects().unwrap(); assert_eq!(objects.len(), 7); - - let robjects: std::collections::HashSet<_> = - objects.iter().map(|t| t.as_ref_str()).collect(); - assert!(robjects.contains(&rdf::Property.into())); - assert!(robjects.contains(&rdfs::Class.into())); - assert!(robjects.contains(&rdfs::Resource.into())); - assert!(robjects.contains(&C1)); - assert!(robjects.contains(&C2)); - assert!(robjects.contains(&I2A)); - assert!(robjects.contains(&I2B)); + assert_contains(&objects, &rdf::Property); + assert_contains(&objects, &rdfs::Class); + assert_contains(&objects, &rdfs::Resource); + assert_contains(&objects, &*C1); + assert_contains(&objects, &*C2); + assert_contains(&objects, &*I2A); + assert_contains(&objects, &*I2B); Ok(()) } @@ -537,11 +531,8 @@ macro_rules! test_graph_impl { let iris = g.iris().unwrap(); assert_eq!(iris.len(), 2); - - let riris: std::collections::HashSet<_> = - iris.iter().map(|t| t.as_ref_str()).collect(); - assert!(riris.contains(&rdf::Property.into())); - assert!(riris.contains(&rdf::type_.into())); + assert_contains(&iris, &rdf::Property); + assert_contains(&iris, &rdf::type_); Ok(()) } @@ -555,11 +546,8 @@ macro_rules! test_graph_impl { let bnodes = g.bnodes().unwrap(); assert_eq!(bnodes.len(), 2); - - let rbnodes: std::collections::HashSet<_> = - bnodes.iter().map(|t| t.value()).collect(); - assert!(rbnodes.contains("1")); - assert!(rbnodes.contains("2")); + assert_contains(&bnodes, &*B1); + assert_contains(&bnodes, &*B2); Ok(()) } @@ -573,12 +561,9 @@ macro_rules! test_graph_impl { let literals = g.literals().unwrap(); assert_eq!(literals.len(), 3); - - let rliterals: std::collections::HashSet<_> = - literals.iter().map(|t| t.as_ref_str()).collect(); - assert!(rliterals.contains(&"lit1".as_literal().into())); - assert!(rliterals.contains(&"lit2".as_literal().into())); - assert!(rliterals.contains(&StaticTerm::new_literal_lang("lit2", "en").unwrap())); + assert_contains(&literals, &*L1); + assert_contains(&literals, &*L2); + assert_contains(&literals, &*L2E); Ok(()) } @@ -589,12 +574,9 @@ macro_rules! test_graph_impl { let variables = g.variables().unwrap(); assert_eq!(variables.len(), 3); - - let rvariables: std::collections::HashSet<_> = - variables.iter().map(|t| t.value()).collect(); - assert!(rvariables.contains("v1")); - assert!(rvariables.contains("v2")); - assert!(rvariables.contains("v3")); + assert_contains(&variables, &*V1); + assert_contains(&variables, &*V2); + assert_contains(&variables, &*V3); } else { let g: $graph_impl = $graph_collector(strict_node_types_triples()).unwrap(); From ebcb60fb50608db6da4fbd38160e6213d52d687a Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Champin Date: Wed, 17 Jun 2020 19:17:56 +0200 Subject: [PATCH 06/11] remove sophia_term dependencies in triple/quad-stream tests --- api/src/term/test.rs | 2 +- sophia/src/quad/stream/test.rs | 30 +++++++++---------- sophia/src/triple/stream/test.rs | 50 +++++++++++++++++--------------- 3 files changed, 43 insertions(+), 39 deletions(-) diff --git a/api/src/term/test.rs b/api/src/term/test.rs index 6e6da98f..7e9efecb 100644 --- a/api/src/term/test.rs +++ b/api/src/term/test.rs @@ -5,7 +5,7 @@ use std::fmt; use std::hash; /// A naive implementation of TTerm, with no check whatsoever. -#[derive(Clone, Copy)] +#[derive(Clone, Copy, Debug)] pub struct TestTerm { kind: TermKind, value: T, diff --git a/sophia/src/quad/stream/test.rs b/sophia/src/quad/stream/test.rs index 4e120b30..1accd798 100644 --- a/sophia/src/quad/stream/test.rs +++ b/sophia/src/quad/stream/test.rs @@ -4,20 +4,22 @@ use crate::quad::Quad; use crate::triple::stream::TripleSource; use lazy_static::lazy_static; use sophia_api::ns::{rdf, xsd}; -use sophia_term::{BoxTerm, StaticTerm}; +use sophia_api::term::test::TestTerm; +use sophia_api::term::CopiableTerm; + +type BoxTerm = TestTerm>; +type StaticTerm = TestTerm<&'static str>; pub const NS: &'static str = "http://example.org/"; lazy_static! { - pub static ref ALICE: StaticTerm = StaticTerm::new_iri_suffixed(NS, "alice").unwrap(); - pub static ref BOB: StaticTerm = StaticTerm::new_iri_suffixed(NS, "bob").unwrap(); - pub static ref CHARLIE: StaticTerm = StaticTerm::new_iri_suffixed(NS, "charlie").unwrap(); - pub static ref KNOWS: StaticTerm = StaticTerm::new_iri_suffixed(NS, "knows").unwrap(); - pub static ref NAME: StaticTerm = StaticTerm::new_iri_suffixed(NS, "name").unwrap(); - pub static ref PERSON: StaticTerm = StaticTerm::new_iri_suffixed(NS, "Person").unwrap(); - pub static ref ALICE_LIT: StaticTerm = - StaticTerm::new_literal_dt("Alice", xsd::string.clone()).unwrap(); - pub static ref BOB_LIT: StaticTerm = - StaticTerm::new_literal_dt("Bob", xsd::string.clone()).unwrap(); + pub static ref ALICE: StaticTerm = StaticTerm::iri2(NS, "alice"); + pub static ref BOB: StaticTerm = StaticTerm::iri2(NS, "bob"); + pub static ref CHARLIE: StaticTerm = StaticTerm::iri2(NS, "charlie"); + pub static ref KNOWS: StaticTerm = StaticTerm::iri2(NS, "knows"); + pub static ref NAME: StaticTerm = StaticTerm::iri2(NS, "name"); + pub static ref PERSON: StaticTerm = StaticTerm::iri2(NS, "Person"); + pub static ref ALICE_LIT: StaticTerm = StaticTerm::lit_dt("Alice", xsd::string); + pub static ref BOB_LIT: StaticTerm = StaticTerm::lit_dt("Bob", xsd::string); } fn make_dataset() -> Vec<[StaticTerm; 4]> { @@ -153,7 +155,7 @@ fn filter_map_quads_to_triples() { d.quads() .filter_map_quads(|q| -> Option<[BoxTerm; 3]> { if q.s() == &BOB as &StaticTerm { - Some([q.s().clone_into(), q.p().clone_into(), q.o().clone_into()]) + Some([q.s().copied(), q.p().copied(), q.o().copied()]) } else { None } @@ -219,9 +221,7 @@ fn map_quads_to_triple() { let d = make_dataset(); let mut g = Vec::<[BoxTerm; 3]>::new(); d.quads() - .map_quads(|q| -> [BoxTerm; 3] { - [q.s().clone_into(), q.p().clone_into(), q.o().clone_into()] - }) + .map_quads(|q| -> [BoxTerm; 3] { [q.s().copied(), q.p().copied(), q.o().copied()] }) .add_to_graph(&mut g) .unwrap(); assert_eq!(d.len(), g.len()); diff --git a/sophia/src/triple/stream/test.rs b/sophia/src/triple/stream/test.rs index 3fbdb4cf..457f5c07 100644 --- a/sophia/src/triple/stream/test.rs +++ b/sophia/src/triple/stream/test.rs @@ -4,26 +4,30 @@ use crate::quad::stream::QuadSource; use crate::triple::Triple; use lazy_static::lazy_static; use sophia_api::ns::{rdf, xsd}; -use sophia_term::BoxTerm; +use sophia_api::term::test::TestTerm; +use sophia_api::term::CopiableTerm; + +type BoxTerm = TestTerm>; +type StaticTerm = TestTerm<&'static str>; pub const NS: &'static str = "http://example.org/"; lazy_static! { - pub static ref ALICE: StaticTerm = StaticTerm::new_iri_suffixed(NS, "alice").unwrap(); - pub static ref BOB: StaticTerm = StaticTerm::new_iri_suffixed(NS, "bob").unwrap(); - pub static ref CHARLIE: StaticTerm = StaticTerm::new_iri_suffixed(NS, "charlie").unwrap(); - pub static ref KNOWS: StaticTerm = StaticTerm::new_iri_suffixed(NS, "knows").unwrap(); - pub static ref NAME: StaticTerm = StaticTerm::new_iri_suffixed(NS, "name").unwrap(); - pub static ref PERSON: StaticTerm = StaticTerm::new_iri_suffixed(NS, "Person").unwrap(); - pub static ref ALICE_LIT: StaticTerm = StaticTerm::new_literal_dt("Alice", xsd::string).unwrap(); - pub static ref BOB_LIT: StaticTerm = StaticTerm::new_literal_dt("Bob", xsd::string).unwrap(); + pub static ref ALICE: StaticTerm = StaticTerm::iri2(NS, "alice"); + pub static ref BOB: StaticTerm = StaticTerm::iri2(NS, "bob"); + pub static ref CHARLIE: StaticTerm = StaticTerm::iri2(NS, "charlie"); + pub static ref KNOWS: StaticTerm = StaticTerm::iri2(NS, "knows"); + pub static ref NAME: StaticTerm = StaticTerm::iri2(NS, "name"); + pub static ref PERSON: StaticTerm = StaticTerm::iri2(NS, "Person"); + pub static ref ALICE_LIT: StaticTerm = StaticTerm::lit_dt("Alice", xsd::string); + pub static ref BOB_LIT: StaticTerm = StaticTerm::lit_dt("Bob", xsd::string); // Relative IRIs - pub static ref ALICE_REF: StaticTerm = StaticTerm::new_iri("alice").unwrap(); - pub static ref BOB_REF: StaticTerm = StaticTerm::new_iri("bob").unwrap(); - pub static ref CHARLIE_REF: StaticTerm = StaticTerm::new_iri("charlie").unwrap(); - pub static ref KNOWS_REF: StaticTerm = StaticTerm::new_iri("knows").unwrap(); - pub static ref NAME_REF: StaticTerm = StaticTerm::new_iri("name").unwrap(); - pub static ref PERSON_REF: StaticTerm = StaticTerm::new_iri("Person").unwrap(); + pub static ref ALICE_REF: StaticTerm = StaticTerm::iri("alice"); + pub static ref BOB_REF: StaticTerm = StaticTerm::iri("bob"); + pub static ref CHARLIE_REF: StaticTerm = StaticTerm::iri("charlie"); + pub static ref KNOWS_REF: StaticTerm = StaticTerm::iri("knows"); + pub static ref NAME_REF: StaticTerm = StaticTerm::iri("name"); + pub static ref PERSON_REF: StaticTerm = StaticTerm::iri("Person"); } fn make_graph() -> Vec<[StaticTerm; 3]> { @@ -152,10 +156,10 @@ fn filter_map_triples_to_quads() { .filter_map_triples(|t| -> Option<[BoxTerm; 4]> { if t.s() == &BOB as &StaticTerm { Some([ - t.s().clone_into(), - t.p().clone_into(), - t.o().clone_into(), - t.s().clone_into(), + t.s().copied(), + t.p().copied(), + t.o().copied(), + t.s().copied(), ]) } else { None @@ -219,10 +223,10 @@ fn map_triples_to_quads() { g.triples() .map_triples(|t| -> [BoxTerm; 4] { [ - t.s().clone_into(), - t.p().clone_into(), - t.o().clone_into(), - t.s().clone_into(), + t.s().copied(), + t.p().copied(), + t.o().copied(), + t.s().copied(), ] }) .add_to_dataset(&mut d) From 28b54a7bdae1ff7da2c3e9a4d2362706e7f1f2b6 Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Champin Date: Wed, 17 Jun 2020 19:51:05 +0200 Subject: [PATCH 07/11] moved modules 'triple', 'quad', 'graph' & 'dataset' to 'sophia_api' Actually, implementation parts of 'graph' and 'dataset' are left in 'sophia'. The test submodules of graph::_isomorphism and dataset::_isomorphism have been commented out for the moment, as they depend on `sophia`. --- api/Cargo.toml | 11 +++++ {sophia => api}/src/dataset.rs | 3 -- {sophia => api}/src/dataset/_ext_impl.rs | 10 ++--- {sophia => api}/src/dataset/_isomorphism.rs | 44 ++++++++++--------- {sophia => api}/src/dataset/_traits.rs | 10 ++--- {sophia => api}/src/dataset/adapter.rs | 0 .../src/dataset/adapter/_dataset_graph.rs | 8 ++-- {sophia => api}/src/dataset/adapter/_error.rs | 0 .../src/dataset/adapter/_graph_as_dataset.rs | 8 ++-- {sophia => api}/src/dataset/test.rs | 13 +++--- {sophia => api}/src/graph.rs | 4 -- {sophia => api}/src/graph/_ext_impl.rs | 10 ++--- {sophia => api}/src/graph/_isomorphism.rs | 14 +++--- {sophia => api}/src/graph/_traits.rs | 35 ++++++++------- {sophia => api}/src/graph/test.rs | 9 ++-- api/src/lib.rs | 8 ++-- {sophia => api}/src/quad.rs | 2 +- {sophia => api}/src/quad/stream.rs | 0 {sophia => api}/src/quad/stream/_filter.rs | 0 .../src/quad/stream/_filter_map.rs | 0 {sophia => api}/src/quad/stream/_iterator.rs | 0 {sophia => api}/src/quad/stream/_map.rs | 0 {sophia => api}/src/quad/stream/test.rs | 6 +-- {sophia => api}/src/quad/streaming_mode.rs | 2 +- .../src/quad/streaming_mode/_unsafe_quad.rs | 2 +- api/src/term.rs | 2 +- {sophia => api}/src/triple.rs | 3 +- {sophia => api}/src/triple/stream.rs | 0 {sophia => api}/src/triple/stream/_error.rs | 0 {sophia => api}/src/triple/stream/_filter.rs | 0 .../src/triple/stream/_filter_map.rs | 0 .../src/triple/stream/_iterator.rs | 0 {sophia => api}/src/triple/stream/_map.rs | 0 {sophia => api}/src/triple/stream/test.rs | 6 +-- {sophia => api}/src/triple/streaming_mode.rs | 2 +- .../triple/streaming_mode/_unsafe_triple.rs | 2 +- sophia/Cargo.toml | 4 +- sophia/src/dataset/inmem.rs | 14 +++--- sophia/src/dataset/inmem/_gspo_wrapper.rs | 9 ++-- sophia/src/dataset/inmem/_hash_dataset.rs | 8 ++-- sophia/src/dataset/inmem/_ogps_wrapper.rs | 9 ++-- sophia/src/dataset/inmem/_wrapper.rs | 1 + sophia/src/graph/indexed.rs | 14 +++++- sophia/src/graph/inmem.rs | 14 +++--- sophia/src/graph/inmem/_hash_graph.rs | 2 +- sophia/src/graph/inmem/_ops_wrapper.rs | 7 +-- sophia/src/graph/inmem/_spo_wrapper.rs | 7 +-- sophia/src/graph/inmem/_wrapper.rs | 29 +++++++++--- sophia/src/lib.rs | 30 +++++++------ sophia/src/parser.rs | 4 +- sophia/src/parser/rio_common.rs | 12 ++--- sophia/src/serializer/nq.rs | 2 +- sophia/src/serializer/nt.rs | 2 - 53 files changed, 210 insertions(+), 172 deletions(-) rename {sophia => api}/src/dataset.rs (92%) rename {sophia => api}/src/dataset/_ext_impl.rs (96%) rename {sophia => api}/src/dataset/_isomorphism.rs (98%) rename {sophia => api}/src/dataset/_traits.rs (99%) rename {sophia => api}/src/dataset/adapter.rs (100%) rename {sophia => api}/src/dataset/adapter/_dataset_graph.rs (97%) rename {sophia => api}/src/dataset/adapter/_error.rs (100%) rename {sophia => api}/src/dataset/adapter/_graph_as_dataset.rs (98%) rename {sophia => api}/src/dataset/test.rs (99%) rename {sophia => api}/src/graph.rs (90%) rename {sophia => api}/src/graph/_ext_impl.rs (96%) rename {sophia => api}/src/graph/_isomorphism.rs (98%) rename {sophia => api}/src/graph/_traits.rs (96%) rename {sophia => api}/src/graph/test.rs (99%) rename {sophia => api}/src/quad.rs (99%) rename {sophia => api}/src/quad/stream.rs (100%) rename {sophia => api}/src/quad/stream/_filter.rs (100%) rename {sophia => api}/src/quad/stream/_filter_map.rs (100%) rename {sophia => api}/src/quad/stream/_iterator.rs (100%) rename {sophia => api}/src/quad/stream/_map.rs (100%) rename {sophia => api}/src/quad/stream/test.rs (98%) rename {sophia => api}/src/quad/streaming_mode.rs (99%) rename {sophia => api}/src/quad/streaming_mode/_unsafe_quad.rs (98%) rename {sophia => api}/src/triple.rs (98%) rename {sophia => api}/src/triple/stream.rs (100%) rename {sophia => api}/src/triple/stream/_error.rs (100%) rename {sophia => api}/src/triple/stream/_filter.rs (100%) rename {sophia => api}/src/triple/stream/_filter_map.rs (100%) rename {sophia => api}/src/triple/stream/_iterator.rs (100%) rename {sophia => api}/src/triple/stream/_map.rs (100%) rename {sophia => api}/src/triple/stream/test.rs (98%) rename {sophia => api}/src/triple/streaming_mode.rs (99%) rename {sophia => api}/src/triple/streaming_mode/_unsafe_triple.rs (98%) diff --git a/api/Cargo.toml b/api/Cargo.toml index caf7f778..0d703d84 100644 --- a/api/Cargo.toml +++ b/api/Cargo.toml @@ -13,9 +13,20 @@ keywords = ["rdf", "linked-data", "semantic-web"] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[features] +default = [] +# This feature enables to use the graph and dataset test macros in other crates +test_macro = ["lazy_static"] + [dependencies] sophia_iri = { version = "0.5.3", path = "../iri" } mownstr = "0.1.1" +resiter = "0.4.0" +thiserror = "1.0.15" + +lazy_static = { version = "1.4.0", optional = true } + [dev-dependencies] +lazy_static = { version = "1.4.0" } sophia_iri = { version = "0.5.3", path = "../iri", features = ["test_data"] } diff --git a/sophia/src/dataset.rs b/api/src/dataset.rs similarity index 92% rename from sophia/src/dataset.rs rename to api/src/dataset.rs index ff4e3d4c..1b76d1e6 100644 --- a/sophia/src/dataset.rs +++ b/api/src/dataset.rs @@ -12,9 +12,6 @@ pub mod test; pub mod adapter; -#[macro_use] -pub mod indexed; -pub mod inmem; mod _ext_impl; pub use self::_ext_impl::*; diff --git a/sophia/src/dataset/_ext_impl.rs b/api/src/dataset/_ext_impl.rs similarity index 96% rename from sophia/src/dataset/_ext_impl.rs rename to api/src/dataset/_ext_impl.rs index 8bfd1aee..142bd275 100644 --- a/sophia/src/dataset/_ext_impl.rs +++ b/api/src/dataset/_ext_impl.rs @@ -11,7 +11,7 @@ use super::*; use crate::quad::stream::{AsQuadSource, QuadSource, StreamError, StreamResult}; use crate::quad::streaming_mode::*; use crate::quad::*; -use sophia_api::term::{same_graph_name, term_eq, CopiableTerm, CopyTerm, TTerm}; +use crate::term::{same_graph_name, term_eq, CopiableTerm, CopyTerm, TTerm}; impl Dataset for [Q] where @@ -192,14 +192,12 @@ impl SetDataset for HashSet where T: Eq + Hash + Quad { #[cfg(test)] mod test { use super::*; + use crate::ns::*; use crate::quad::TupleQuad; - use sophia_api::ns::*; - use sophia_api::term::test::TestTerm; - use sophia_api::term::SimpleIri; + use crate::term::SimpleIri; #[allow(dead_code)] - type BoxTerm = TestTerm>; - + type BoxTerm = crate::term::test::TestTerm>; static D: [TupleQuad; 3] = [ ([rdf::type_, rdf::type_, rdf::Property], None), ([rdf::Property, rdf::type_, rdfs::Class], None), diff --git a/sophia/src/dataset/_isomorphism.rs b/api/src/dataset/_isomorphism.rs similarity index 98% rename from sophia/src/dataset/_isomorphism.rs rename to api/src/dataset/_isomorphism.rs index 7ab79d27..ff14043e 100644 --- a/sophia/src/dataset/_isomorphism.rs +++ b/api/src/dataset/_isomorphism.rs @@ -1,16 +1,16 @@ //! This module implements check for isomorphic blank node equivalence of RDF //! datasets. //! -//! It is publicly exported to `sophia::dataset`. +//! Its public member are transparently re-exported by its [parent module](../index.html). use crate::dataset::{DQuad, DTerm, Dataset}; use crate::graph::{bn_mapper, hash_if_not_bn, match_ignore_bns}; use crate::quad::Quad; use crate::term::matcher::AnyOrExactlyRef; +use crate::term::{TTerm, TermKind}; use crate::triple::stream::{ SinkError, SinkResult as _, SourceError, SourceResult as _, StreamError, StreamResult, }; -use sophia_api::term::{TTerm, TermKind}; use std::collections::{BTreeSet, HashMap}; use std::error::Error; use std::fmt; @@ -429,6 +429,7 @@ where Ok(objects) } +/* #[cfg(test)] mod test { use super::*; @@ -442,19 +443,19 @@ mod test { @prefix rdf: . @prefix dc: . @prefix foaf: . - + { dc:publisher "Bob" . dc:publisher "Alice" . } - + { _:a foaf:name "Bob" . _:a foaf:mbox . _:a foaf:knows _:b . } - + { _:b foaf:name "Alice" . @@ -465,19 +466,19 @@ mod test { @prefix rdf: . @prefix dc: . @prefix foaf: . - + { dc:publisher "Bob" . dc:publisher "Alice" . } - + { _:a2 foaf:name "Bob" . _:a2 foaf:mbox . _:a2 foaf:knows _:b2 . } - + { _:b2 foaf:name "Alice" . @@ -488,19 +489,19 @@ mod test { @prefix rdf: . @prefix dc: . @prefix foaf: . - + { dc:publisher "Bob" . dc:publisher "Alice" . } - + { _:a3 foaf:name "Bob" . _:a3 foaf:mbox . _:a3 foaf:knows _:b3 . } - + { _:c3 foaf:name "Alice" . @@ -525,19 +526,19 @@ mod test { @prefix rdf: . @prefix dc: . @prefix foaf: . - + { dc:publisher "Bob" . dc:publisher "Alice" . } - + { _:a foaf:name "Bob" . _:a foaf:mbox . _:a foaf:knows _:b . } - + { _:b foaf:name "Alice" . @@ -547,7 +548,7 @@ mod test { let nq = r#" "Bob" . "Alice" . - + _:a2 "Bob" . _:a2 . _:a2 _:b2 . @@ -570,20 +571,20 @@ mod test { @prefix rdf: . @prefix dc: . @prefix foaf: . - + { _:bob dc:publisher "Bob" . _:alice dc:publisher "Alice" . } - + _:bob { _:a foaf:name "Bob" . _:a foaf:mbox . _:a foaf:knows _:b . } - + _:alice { _:b foaf:name "Alice" . @@ -594,20 +595,20 @@ mod test { @prefix rdf: . @prefix dc: . @prefix foaf: . - + { _:bob2 dc:publisher "Bob" . _:alice2 dc:publisher "Alice" . } - + _:bob2 { _:a2 foaf:name "Bob" . _:a2 foaf:mbox . _:a2 foaf:knows _:b2 . } - + _:alice2 { _:b2 foaf:name "Alice" . @@ -623,3 +624,4 @@ mod test { Ok(()) } } +*/ diff --git a/sophia/src/dataset/_traits.rs b/api/src/dataset/_traits.rs similarity index 99% rename from sophia/src/dataset/_traits.rs rename to api/src/dataset/_traits.rs index dad64b9f..252f8181 100644 --- a/sophia/src/dataset/_traits.rs +++ b/api/src/dataset/_traits.rs @@ -12,9 +12,9 @@ use crate::dataset::adapter::DatasetGraph; use crate::quad::stream::*; use crate::quad::streaming_mode::*; use crate::quad::*; +use crate::term::matcher::*; +use crate::term::{same_graph_name, term_eq, TTerm, TermKind}; use crate::triple::stream::StreamResult; -use sophia_api::term::matcher::*; -use sophia_api::term::{same_graph_name, term_eq, TTerm, TermKind}; use crate::graph::insert_if_absent; @@ -58,7 +58,7 @@ pub trait Dataset { /// The result of this method is an iterator, /// so it can be used in a `for` loop: /// ``` - /// # use sophia::dataset::Dataset; + /// # use sophia_api::dataset::Dataset; /// # use sophia_api::term::simple_iri::SimpleIri; /// # fn foo() -> Result<(), std::convert::Infallible> { /// # let dataset = Vec::<[SimpleIri;4]>::new(); @@ -74,9 +74,9 @@ pub trait Dataset { /// [`QuadSource`](../quad/stream/trait.QuadSource.html), /// for example: /// ``` - /// # use sophia::dataset::Dataset; + /// # use sophia_api::dataset::Dataset; + /// # use sophia_api::quad::stream::QuadSource; /// # use sophia_api::term::simple_iri::SimpleIri; - /// # use sophia::quad::stream::QuadSource; /// # fn foo() -> Result<(), std::convert::Infallible> { /// # let dataset = Vec::<[SimpleIri;4]>::new(); /// dataset.quads().for_each_quad(|q| { diff --git a/sophia/src/dataset/adapter.rs b/api/src/dataset/adapter.rs similarity index 100% rename from sophia/src/dataset/adapter.rs rename to api/src/dataset/adapter.rs diff --git a/sophia/src/dataset/adapter/_dataset_graph.rs b/api/src/dataset/adapter/_dataset_graph.rs similarity index 97% rename from sophia/src/dataset/adapter/_dataset_graph.rs rename to api/src/dataset/adapter/_dataset_graph.rs index 8d12a0a7..a692239f 100644 --- a/sophia/src/dataset/adapter/_dataset_graph.rs +++ b/api/src/dataset/adapter/_dataset_graph.rs @@ -7,9 +7,9 @@ use resiter::Map; use crate::dataset::{Dataset, MutableDataset, SetDataset}; use crate::graph::*; +use crate::term::matcher::{GraphNameMatcher, ANY}; +use crate::term::TTerm; use crate::triple::streaming_mode::{FromQuad, StreamedTriple}; -use sophia_api::term::matcher::{GraphNameMatcher, ANY}; -use sophia_api::term::TTerm; /// The adapter returned by /// [`Dataset::graph`](../trait.Dataset.html#method.graph) @@ -185,11 +185,11 @@ mod test { use crate::dataset::MDResult; use crate::dataset::*; use crate::quad::stream::QuadSource; + use crate::term::{same_graph_name, SimpleIri, TTerm}; use crate::triple::stream::TripleSource; - use sophia_api::term::{same_graph_name, SimpleIri, TTerm}; use std::collections::HashSet; - type BoxTerm = sophia_api::term::test::TestTerm>; + type BoxTerm = crate::term::test::TestTerm>; type MyQuad = ([BoxTerm; 3], Option); type MyDataset = HashSet; diff --git a/sophia/src/dataset/adapter/_error.rs b/api/src/dataset/adapter/_error.rs similarity index 100% rename from sophia/src/dataset/adapter/_error.rs rename to api/src/dataset/adapter/_error.rs diff --git a/sophia/src/dataset/adapter/_graph_as_dataset.rs b/api/src/dataset/adapter/_graph_as_dataset.rs similarity index 98% rename from sophia/src/dataset/adapter/_graph_as_dataset.rs rename to api/src/dataset/adapter/_graph_as_dataset.rs index f4aba5b5..83732a7d 100644 --- a/sophia/src/dataset/adapter/_graph_as_dataset.rs +++ b/api/src/dataset/adapter/_graph_as_dataset.rs @@ -10,7 +10,7 @@ use resiter::Map; use crate::dataset::*; use crate::graph::{Graph, MutableGraph, SetGraph}; use crate::quad::streaming_mode::{FromTriple, StreamedQuad}; -use sophia_api::term::TTerm; +use crate::term::TTerm; use super::GraphAsDatasetError; @@ -388,14 +388,14 @@ mod test { use crate::dataset::adapter::DatasetGraph; use crate::dataset::{Dataset, MutableDataset}; use crate::graph::*; + use crate::ns::{rdf, rdfs}; use crate::triple::stream::TripleSource; - use sophia_api::ns::{rdf, rdfs}; use std::collections::HashSet; use std::convert::Infallible; use std::error::Error; - type BoxTerm = sophia_api::term::test::TestTerm>; - type StaticTerm = sophia_api::term::test::TestTerm<&'static str>; + type BoxTerm = crate::term::test::TestTerm>; + type StaticTerm = crate::term::test::TestTerm<&'static str>; const DG: Option<&'static StaticTerm> = None; diff --git a/sophia/src/dataset/test.rs b/api/src/dataset/test.rs similarity index 99% rename from sophia/src/dataset/test.rs rename to api/src/dataset/test.rs index 24352ab7..41bdc508 100644 --- a/sophia/src/dataset/test.rs +++ b/api/src/dataset/test.rs @@ -4,14 +4,13 @@ use std::fmt::Debug; use crate::dataset::*; use crate::graph::test::*; +use crate::ns::*; use crate::quad::stream::*; use crate::quad::streaming_mode::{QuadStreamingMode, UnsafeQuad}; use crate::quad::*; +use crate::term::test::TestTerm; +use crate::term::{CopiableTerm, CopyTerm}; use lazy_static::lazy_static; -pub use sophia_api; // required when test macro is used in other packages -use sophia_api::ns::*; -use sophia_api::term::test::TestTerm; -use sophia_api::term::{CopiableTerm, CopyTerm}; type StaticTerm = TestTerm<&'static str>; type BoxTerm = TestTerm>; @@ -326,9 +325,9 @@ macro_rules! test_dataset_impl { use $crate::dataset::test::*; use $crate::dataset::*; use $crate::graph::test::*; - use self::sophia_api::ns::*; - use self::sophia_api::term::TTerm; - use self::sophia_api::term::matcher::ANY; + use $crate::ns::*; + use $crate::term::TTerm; + use $crate::term::matcher::ANY; #[allow(unused_imports)] use super::*; diff --git a/sophia/src/graph.rs b/api/src/graph.rs similarity index 90% rename from sophia/src/graph.rs rename to api/src/graph.rs index cd8a2a26..7ba6da57 100644 --- a/sophia/src/graph.rs +++ b/api/src/graph.rs @@ -9,10 +9,6 @@ #[macro_use] pub mod test; -#[macro_use] -pub mod indexed; -pub mod inmem; - mod _ext_impl; pub use self::_ext_impl::*; mod _traits; diff --git a/sophia/src/graph/_ext_impl.rs b/api/src/graph/_ext_impl.rs similarity index 96% rename from sophia/src/graph/_ext_impl.rs rename to api/src/graph/_ext_impl.rs index 9485dd68..9da2ed53 100644 --- a/sophia/src/graph/_ext_impl.rs +++ b/api/src/graph/_ext_impl.rs @@ -8,10 +8,10 @@ use std::hash::{BuildHasher, Hash}; use resiter::oks::*; use super::*; +use crate::term::{term_eq, CopiableTerm, CopyTerm, TTerm}; use crate::triple::stream::{AsTripleSource, StreamError, StreamResult, TripleSource}; use crate::triple::streaming_mode::*; use crate::triple::*; -use sophia_api::term::{term_eq, CopiableTerm, CopyTerm, TTerm}; impl Graph for [T] where @@ -165,12 +165,10 @@ impl<'a, T, S: BuildHasher> SetGraph for HashSet where T: Eq + Hash + Trip #[cfg(test)] mod test { use super::*; - use sophia_api::ns::*; - use sophia_api::term::test::TestTerm; - use sophia_api::term::SimpleIri; - + use crate::ns::*; + use crate::term::SimpleIri; #[allow(dead_code)] - type BoxTerm = TestTerm>; + type BoxTerm = crate::term::test::TestTerm>; static G: [[SimpleIri; 3]; 3] = [ [rdf::type_, rdf::type_, rdf::Property], diff --git a/sophia/src/graph/_isomorphism.rs b/api/src/graph/_isomorphism.rs similarity index 98% rename from sophia/src/graph/_isomorphism.rs rename to api/src/graph/_isomorphism.rs index 10e74120..f73aee0e 100644 --- a/sophia/src/graph/_isomorphism.rs +++ b/api/src/graph/_isomorphism.rs @@ -1,15 +1,15 @@ //! This module implements check for isomorphic blank node equivalence of RDF //! graphs. //! -//! It is publicly exported to `sophia::graph`. +//! Its public members are transparently re-exported by its [parent module](../index.html). use crate::graph::{GTerm, GTriple, Graph}; +use crate::term::matcher::AnyOrExactlyRef; +use crate::term::{term_hash, TTerm, TermKind}; use crate::triple::stream::{ SinkError, SinkResult as _, SourceError, SourceResult as _, StreamError, StreamResult, }; use crate::triple::Triple; -use sophia_api::term::matcher::AnyOrExactlyRef; -use sophia_api::term::{term_hash, TTerm, TermKind}; use std::collections::{BTreeSet, HashMap}; use std::error::Error; use std::fmt; @@ -463,11 +463,12 @@ where Ok(objects) } +/* #[cfg(test)] mod test { use super::*; use crate::graph::inmem::FastGraph; - use crate::parser::{nt, turtle}; + //use crate::parser::{nt, turtle}; use crate::triple::stream::TripleSource; #[test] @@ -630,7 +631,7 @@ mod test { :members 23 ], _:b4 ; :presidency _:a1, _:a2, _:a3, _:a4 . - + _:a1 :next _:a2 . _:a2 :next _:a3 ; :president :MBachelet . @@ -648,7 +649,7 @@ mod test { :members 23 ], _:b42 ; :presidency _:a12, _:a22, _:a32, _:a42 . - + _:a12 :next _:a22 . _:a22 :next _:a32 ; :president :MBachelet . @@ -666,3 +667,4 @@ mod test { Ok(()) } } +*/ diff --git a/sophia/src/graph/_traits.rs b/api/src/graph/_traits.rs similarity index 96% rename from sophia/src/graph/_traits.rs rename to api/src/graph/_traits.rs index dfaeb731..ce077ed9 100644 --- a/sophia/src/graph/_traits.rs +++ b/api/src/graph/_traits.rs @@ -7,11 +7,11 @@ use resiter::filter::*; use resiter::map::*; use crate::dataset::adapter::GraphAsDataset; +use crate::term::matcher::TermMatcher; +use crate::term::{term_eq, TTerm, TermKind}; use crate::triple::stream::*; use crate::triple::streaming_mode::*; use crate::triple::*; -use sophia_api::term::matcher::TermMatcher; -use sophia_api::term::{term_eq, TTerm, TermKind}; use std::convert::Infallible; use std::error::Error; @@ -58,7 +58,7 @@ pub trait Graph { /// The result of this method is an iterator, /// so it can be used in a `for` loop: /// ``` - /// # use sophia::graph::Graph; + /// # use sophia_api::graph::Graph; /// # use sophia_api::term::simple_iri::SimpleIri; /// # fn foo() -> Result<(), std::convert::Infallible> { /// # let graph = Vec::<[SimpleIri;3]>::new(); @@ -74,9 +74,9 @@ pub trait Graph { /// [`TripleSource`](../triple/stream/trait.TripleSource.html), /// for example: /// ``` - /// # use sophia::graph::Graph; + /// # use sophia_api::graph::Graph; /// # use sophia_api::term::simple_iri::SimpleIri; - /// # use sophia::triple::stream::TripleSource; + /// # use sophia_api::triple::stream::TripleSource; /// # fn foo() -> Result<(), std::convert::Infallible> { /// # let graph = Vec::<[SimpleIri;3]>::new(); /// graph.triples().for_each_triple(|t| { @@ -188,12 +188,15 @@ pub trait Graph { /// The special `ANY` matcher can also be used to match anything. /// /// ``` - /// # use sophia::graph::{*, inmem::LightGraph}; - /// # use sophia::triple::Triple; + /// # use sophia_api::graph::{Graph, GTerm}; /// # use sophia_api::ns::{Namespace, rdf}; + /// # use sophia_api::triple::Triple; /// # - /// # fn test() -> Result<(), Box> { - /// # let mut graph = LightGraph::new(); + /// # fn test(graph: &G) -> Result<(), Box> + /// # where + /// # G: Graph, + /// # GTerm: std::fmt::Display, + /// # { /// # /// use sophia_api::term::matcher::ANY; /// @@ -211,13 +214,16 @@ pub trait Graph { /// for technical reasons, they must be enclosed in a 1-sized array. /// /// ``` - /// # use sophia::graph::{*, inmem::LightGraph}; - /// # use sophia::triple::Triple; + /// # use sophia_api::graph::{Graph, GTerm}; /// # use sophia_api::ns::rdfs; /// # use sophia_api::term::{TTerm, TermKind::Literal}; + /// # use sophia_api::triple::Triple; /// # - /// # fn test() -> Result<(), Box> { - /// # let mut graph = LightGraph::new(); + /// # fn test(graph: &G) -> Result<(), Box> + /// # where + /// # G: Graph, + /// # GTerm: std::fmt::Display, + /// # { /// # /// use sophia_api::term::matcher::ANY; /// @@ -429,8 +435,7 @@ pub trait MutableGraph: Graph { /// # Usage /// ``` /// # use sophia_api::ns::{Namespace, rdf, rdfs, xsd}; - /// # use sophia::graph::{MutableGraph, MGResult}; - /// # use std::collections::HashSet; + /// # use sophia_api::graph::{MutableGraph, MGResult}; /// /// # fn populate(graph: &mut G) -> MGResult { /// let schema = Namespace::new("http://schema.org/").unwrap(); diff --git a/sophia/src/graph/test.rs b/api/src/graph/test.rs similarity index 99% rename from sophia/src/graph/test.rs rename to api/src/graph/test.rs index 5bd1707d..35147eee 100644 --- a/sophia/src/graph/test.rs +++ b/api/src/graph/test.rs @@ -3,14 +3,13 @@ use std::fmt::Debug; use crate::graph::*; +use crate::ns::*; +use crate::term::test::TestTerm; +use crate::term::CopiableTerm; use crate::triple::stream::*; use crate::triple::streaming_mode::{TripleStreamingMode, UnsafeTriple}; use crate::triple::*; use lazy_static::lazy_static; -pub use sophia_api; -use sophia_api::ns::*; -use sophia_api::term::test::TestTerm; -use sophia_api::term::CopiableTerm; type StaticTerm = TestTerm<&'static str>; type BoxTerm = TestTerm>; @@ -293,7 +292,7 @@ macro_rules! test_graph_impl { use $crate::graph::test::*; use $crate::graph::*; use $crate::ns::*; - use self::sophia_api::term::matcher::ANY; + use $crate::term::matcher::ANY; #[allow(unused_imports)] use super::*; diff --git a/api/src/lib.rs b/api/src/lib.rs index a1b37c9e..34fc650d 100644 --- a/api/src/lib.rs +++ b/api/src/lib.rs @@ -3,11 +3,11 @@ //! //! See https://docs.rs/sophia/latest/sophia/ -//pub mod dataset; -//pub mod graph; +pub mod dataset; +pub mod graph; pub mod ns; //pub mod parser; -//pub mod quad; +pub mod quad; //pub mod serializer; pub mod term; -//pub mod triple; +pub mod triple; diff --git a/sophia/src/quad.rs b/api/src/quad.rs similarity index 99% rename from sophia/src/quad.rs rename to api/src/quad.rs index a6a75402..5d47a7cb 100644 --- a/sophia/src/quad.rs +++ b/api/src/quad.rs @@ -4,8 +4,8 @@ //! //! They are the individual statements of an RDF `dataset`(../dataset/index.html). +use crate::term::TTerm; use crate::triple::*; -use sophia_api::term::TTerm; pub mod stream; pub mod streaming_mode; diff --git a/sophia/src/quad/stream.rs b/api/src/quad/stream.rs similarity index 100% rename from sophia/src/quad/stream.rs rename to api/src/quad/stream.rs diff --git a/sophia/src/quad/stream/_filter.rs b/api/src/quad/stream/_filter.rs similarity index 100% rename from sophia/src/quad/stream/_filter.rs rename to api/src/quad/stream/_filter.rs diff --git a/sophia/src/quad/stream/_filter_map.rs b/api/src/quad/stream/_filter_map.rs similarity index 100% rename from sophia/src/quad/stream/_filter_map.rs rename to api/src/quad/stream/_filter_map.rs diff --git a/sophia/src/quad/stream/_iterator.rs b/api/src/quad/stream/_iterator.rs similarity index 100% rename from sophia/src/quad/stream/_iterator.rs rename to api/src/quad/stream/_iterator.rs diff --git a/sophia/src/quad/stream/_map.rs b/api/src/quad/stream/_map.rs similarity index 100% rename from sophia/src/quad/stream/_map.rs rename to api/src/quad/stream/_map.rs diff --git a/sophia/src/quad/stream/test.rs b/api/src/quad/stream/test.rs similarity index 98% rename from sophia/src/quad/stream/test.rs rename to api/src/quad/stream/test.rs index 1accd798..b095306c 100644 --- a/sophia/src/quad/stream/test.rs +++ b/api/src/quad/stream/test.rs @@ -1,11 +1,11 @@ use super::*; use crate::dataset::Dataset; +use crate::ns::{rdf, xsd}; use crate::quad::Quad; +use crate::term::test::TestTerm; +use crate::term::CopiableTerm; use crate::triple::stream::TripleSource; use lazy_static::lazy_static; -use sophia_api::ns::{rdf, xsd}; -use sophia_api::term::test::TestTerm; -use sophia_api::term::CopiableTerm; type BoxTerm = TestTerm>; type StaticTerm = TestTerm<&'static str>; diff --git a/sophia/src/quad/streaming_mode.rs b/api/src/quad/streaming_mode.rs similarity index 99% rename from sophia/src/quad/streaming_mode.rs rename to api/src/quad/streaming_mode.rs index daa881fc..ecfd7af9 100644 --- a/sophia/src/quad/streaming_mode.rs +++ b/api/src/quad/streaming_mode.rs @@ -7,7 +7,7 @@ use std::marker::PhantomData; use std::ptr::NonNull; use crate::quad::Quad; -use sophia_api::term::TTerm; +use crate::term::TTerm; mod _unsafe_quad; pub(crate) use _unsafe_quad::*; diff --git a/sophia/src/quad/streaming_mode/_unsafe_quad.rs b/api/src/quad/streaming_mode/_unsafe_quad.rs similarity index 98% rename from sophia/src/quad/streaming_mode/_unsafe_quad.rs rename to api/src/quad/streaming_mode/_unsafe_quad.rs index 2bae3ab1..765ce3e6 100644 --- a/sophia/src/quad/streaming_mode/_unsafe_quad.rs +++ b/api/src/quad/streaming_mode/_unsafe_quad.rs @@ -3,7 +3,7 @@ use std::ptr::NonNull; use crate::quad::Quad; -use sophia_api::term::TTerm; +use crate::term::TTerm; pub trait UnsafeQuad { type Term: TTerm + ?Sized; diff --git a/api/src/term.rs b/api/src/term.rs index 44b2d1ad..461db74d 100644 --- a/api/src/term.rs +++ b/api/src/term.rs @@ -444,5 +444,5 @@ where } } -//#[cfg(test)] +#[cfg(any(test, feature = "test_macro"))] pub mod test; diff --git a/sophia/src/triple.rs b/api/src/triple.rs similarity index 98% rename from sophia/src/triple.rs rename to api/src/triple.rs index 7429454f..ea964e82 100644 --- a/sophia/src/triple.rs +++ b/api/src/triple.rs @@ -13,8 +13,7 @@ //! use crate::quad::Quad; -use sophia_api::term::TTerm; -use sophia_term::*; +use crate::term::TTerm; pub mod stream; pub mod streaming_mode; diff --git a/sophia/src/triple/stream.rs b/api/src/triple/stream.rs similarity index 100% rename from sophia/src/triple/stream.rs rename to api/src/triple/stream.rs diff --git a/sophia/src/triple/stream/_error.rs b/api/src/triple/stream/_error.rs similarity index 100% rename from sophia/src/triple/stream/_error.rs rename to api/src/triple/stream/_error.rs diff --git a/sophia/src/triple/stream/_filter.rs b/api/src/triple/stream/_filter.rs similarity index 100% rename from sophia/src/triple/stream/_filter.rs rename to api/src/triple/stream/_filter.rs diff --git a/sophia/src/triple/stream/_filter_map.rs b/api/src/triple/stream/_filter_map.rs similarity index 100% rename from sophia/src/triple/stream/_filter_map.rs rename to api/src/triple/stream/_filter_map.rs diff --git a/sophia/src/triple/stream/_iterator.rs b/api/src/triple/stream/_iterator.rs similarity index 100% rename from sophia/src/triple/stream/_iterator.rs rename to api/src/triple/stream/_iterator.rs diff --git a/sophia/src/triple/stream/_map.rs b/api/src/triple/stream/_map.rs similarity index 100% rename from sophia/src/triple/stream/_map.rs rename to api/src/triple/stream/_map.rs diff --git a/sophia/src/triple/stream/test.rs b/api/src/triple/stream/test.rs similarity index 98% rename from sophia/src/triple/stream/test.rs rename to api/src/triple/stream/test.rs index 457f5c07..527e9487 100644 --- a/sophia/src/triple/stream/test.rs +++ b/api/src/triple/stream/test.rs @@ -1,11 +1,11 @@ use super::*; use crate::graph::Graph; +use crate::ns::{rdf, xsd}; use crate::quad::stream::QuadSource; +use crate::term::test::TestTerm; +use crate::term::CopiableTerm; use crate::triple::Triple; use lazy_static::lazy_static; -use sophia_api::ns::{rdf, xsd}; -use sophia_api::term::test::TestTerm; -use sophia_api::term::CopiableTerm; type BoxTerm = TestTerm>; type StaticTerm = TestTerm<&'static str>; diff --git a/sophia/src/triple/streaming_mode.rs b/api/src/triple/streaming_mode.rs similarity index 99% rename from sophia/src/triple/streaming_mode.rs rename to api/src/triple/streaming_mode.rs index 8e8ca630..d617770a 100644 --- a/sophia/src/triple/streaming_mode.rs +++ b/api/src/triple/streaming_mode.rs @@ -70,8 +70,8 @@ use std::marker::PhantomData; use std::ptr::NonNull; +use crate::term::TTerm; use crate::triple::Triple; -use sophia_api::term::TTerm; mod _unsafe_triple; pub(crate) use _unsafe_triple::*; diff --git a/sophia/src/triple/streaming_mode/_unsafe_triple.rs b/api/src/triple/streaming_mode/_unsafe_triple.rs similarity index 98% rename from sophia/src/triple/streaming_mode/_unsafe_triple.rs rename to api/src/triple/streaming_mode/_unsafe_triple.rs index 72e244f9..af7ef1bd 100644 --- a/sophia/src/triple/streaming_mode/_unsafe_triple.rs +++ b/api/src/triple/streaming_mode/_unsafe_triple.rs @@ -2,8 +2,8 @@ use std::ptr::NonNull; +use crate::term::TTerm; use crate::triple::Triple; -use sophia_api::term::TTerm; pub trait UnsafeTriple { type Term: TTerm + ?Sized; diff --git a/sophia/Cargo.toml b/sophia/Cargo.toml index c488b60a..4858d1e0 100644 --- a/sophia/Cargo.toml +++ b/sophia/Cargo.toml @@ -18,7 +18,7 @@ default = [] # This feature enables the RDF/XML parser xml = ["lazy_static", "percent-encoding", "quick-xml", "sophia_iri", "regex", "url"] # This feature enables to use the graph and dataset test macros in other crates -test_macro = ["lazy_static"] +test_macro = ["sophia_api/test_macro"] # This feature enables a bunch of tests which, otherwise, make compilation really slow all_tests = [] @@ -40,3 +40,5 @@ url = { version = "2.1.1", optional = true } [dev-dependencies] test-case = "1.0.0" lazy_static = "1.4.0" +sophia_api = { version = "0.5.3", path = "../api", features = ["test_macro"] } + diff --git a/sophia/src/dataset/inmem.rs b/sophia/src/dataset/inmem.rs index 60b77e67..bde54584 100644 --- a/sophia/src/dataset/inmem.rs +++ b/sophia/src/dataset/inmem.rs @@ -17,9 +17,9 @@ //! [`FastDataset`]: type.FastDataset.html //! [`LightDataset`]: type.LightDataset.html -use super::_traits::*; use super::indexed::*; use crate::graph::inmem::TermIndexMapU; +use sophia_api::dataset::{CollectibleDataset, Dataset, MutableDataset, SetDataset}; use sophia_term::{factory::*, *}; #[macro_use] @@ -53,10 +53,10 @@ pub type FastDataset = FastWrapper>; pub type LightDataset = GenericDataset; #[cfg(test)] -test_dataset_impl!(test_fastd, FastDataset); +sophia_api::test_dataset_impl!(test_fastd, FastDataset); #[cfg(all(test, feature = "all_tests"))] -test_dataset_impl!(test_lightd, LightDataset); +sophia_api::test_dataset_impl!(test_lightd, LightDataset); /// Flavours of Dataset implementations with a smaller memory-footprint. /// @@ -73,9 +73,9 @@ pub mod small { pub type LightDataset = GenericDataset; #[cfg(all(test, feature = "all_tests"))] - test_dataset_impl!(test_fastd, FastDataset); + sophia_api::test_dataset_impl!(test_fastd, FastDataset); #[cfg(all(test, feature = "all_tests"))] - test_dataset_impl!(test_lightd, LightDataset); + sophia_api::test_dataset_impl!(test_lightd, LightDataset); } /// Flavours of Dataset implementations which are safe to share across threads. @@ -90,7 +90,7 @@ pub mod sync { pub type LightDataset = GenericDataset; #[cfg(all(test, feature = "all_tests"))] - test_dataset_impl!(test_fastd, FastDataset); + sophia_api::test_dataset_impl!(test_fastd, FastDataset); #[cfg(all(test, feature = "all_tests"))] - test_dataset_impl!(test_lightd, LightDataset); + sophia_api::test_dataset_impl!(test_lightd, LightDataset); } diff --git a/sophia/src/dataset/inmem/_gspo_wrapper.rs b/sophia/src/dataset/inmem/_gspo_wrapper.rs index 377e097e..1dc879ac 100644 --- a/sophia/src/dataset/inmem/_gspo_wrapper.rs +++ b/sophia/src/dataset/inmem/_gspo_wrapper.rs @@ -6,7 +6,8 @@ use std::iter::empty; use super::*; use crate::graph::indexed::*; -use crate::quad::streaming_mode::{ByTermRefs, StreamedQuad}; +use sophia_api::dataset::{DQuadSource, DResultTermSet}; +use sophia_api::quad::streaming_mode::{ByTermRefs, StreamedQuad}; use sophia_api::term::TTerm; /// A [`DatasetWrapper`](trait.DatasetWrapper.html) @@ -208,14 +209,14 @@ impl CollectibleDataset for GspoWrapper where T: IndexedDataset + Dataset::TermData>>>, { - impl_collectible_dataset_for_indexed_dataset!(); + crate::impl_collectible_dataset_for_indexed_dataset!(); } impl MutableDataset for GspoWrapper where T: IndexedDataset + Dataset::TermData>>>, { - impl_mutable_dataset_for_indexed_dataset!(); + crate::impl_mutable_dataset_for_indexed_dataset!(); } impl SetDataset for GspoWrapper @@ -228,4 +229,4 @@ where #[cfg(all(test, feature = "all_tests"))] type GspoDataset = GspoWrapper; #[cfg(all(test, feature = "all_tests"))] -test_dataset_impl!(GspoDataset); +sophia_api::test_dataset_impl!(GspoDataset); diff --git a/sophia/src/dataset/inmem/_hash_dataset.rs b/sophia/src/dataset/inmem/_hash_dataset.rs index 42631120..095215fb 100644 --- a/sophia/src/dataset/inmem/_hash_dataset.rs +++ b/sophia/src/dataset/inmem/_hash_dataset.rs @@ -5,10 +5,10 @@ use std::hash::Hash; use crate::dataset::indexed::IndexedDataset; use crate::dataset::*; -use crate::quad::stream::QuadSource; -use crate::quad::streaming_mode::{ByTermRefs, StreamedQuad}; -use crate::triple::stream::StreamResult; +use sophia_api::quad::stream::QuadSource; +use sophia_api::quad::streaming_mode::{ByTermRefs, StreamedQuad}; use sophia_api::term::TTerm; +use sophia_api::triple::stream::StreamResult; use sophia_term::factory::TermFactory; use sophia_term::index_map::TermIndexMap; use sophia_term::*; @@ -216,7 +216,7 @@ where I::Index: Hash, ::TermData: 'static, { - impl_mutable_dataset_for_indexed_dataset!(); + crate::impl_mutable_dataset_for_indexed_dataset!(); } impl SetDataset for HashDataset diff --git a/sophia/src/dataset/inmem/_ogps_wrapper.rs b/sophia/src/dataset/inmem/_ogps_wrapper.rs index 0ac0a2f1..ed1af461 100644 --- a/sophia/src/dataset/inmem/_ogps_wrapper.rs +++ b/sophia/src/dataset/inmem/_ogps_wrapper.rs @@ -5,7 +5,8 @@ use std::iter::empty; use super::*; use crate::graph::indexed::*; -use crate::quad::streaming_mode::{ByTermRefs, StreamedQuad}; +use sophia_api::dataset::{DQuadSource, DResultTermSet}; +use sophia_api::quad::streaming_mode::{ByTermRefs, StreamedQuad}; use sophia_api::term::TTerm; /// A [`DatasetWrapper`](trait.DatasetWrapper.html) @@ -207,14 +208,14 @@ impl CollectibleDataset for OgpsWrapper where T: IndexedDataset + Dataset::TermData>>>, { - impl_collectible_dataset_for_indexed_dataset!(); + crate::impl_collectible_dataset_for_indexed_dataset!(); } impl MutableDataset for OgpsWrapper where T: IndexedDataset + Dataset::TermData>>>, { - impl_mutable_dataset_for_indexed_dataset!(); + crate::impl_mutable_dataset_for_indexed_dataset!(); } impl SetDataset for OgpsWrapper @@ -227,4 +228,4 @@ where #[cfg(all(test, feature = "all_tests"))] type GspoDataset = OgpsWrapper; #[cfg(all(test, feature = "all_tests"))] -test_dataset_impl!(GspoDataset); +sophia_api::test_dataset_impl!(GspoDataset); diff --git a/sophia/src/dataset/inmem/_wrapper.rs b/sophia/src/dataset/inmem/_wrapper.rs index 9b954dd2..1d0b7ec0 100644 --- a/sophia/src/dataset/inmem/_wrapper.rs +++ b/sophia/src/dataset/inmem/_wrapper.rs @@ -2,6 +2,7 @@ use super::*; use crate::dataset::indexed::IndexedDataset; +use sophia_api::dataset::{DQuadSource, DResult, DResultTermSet, DTerm}; use sophia_api::term::TTerm; use std::hash::Hash; diff --git a/sophia/src/graph/indexed.rs b/sophia/src/graph/indexed.rs index 19d7c184..faa16ee0 100644 --- a/sophia/src/graph/indexed.rs +++ b/sophia/src/graph/indexed.rs @@ -97,7 +97,12 @@ macro_rules! impl_mutable_graph_for_indexed_graph { () => { type MutationError = std::convert::Infallible; - fn insert(&mut self, s: &TS_, p: &TP_, o: &TO_) -> MGResult + fn insert( + &mut self, + s: &TS_, + p: &TP_, + o: &TO_, + ) -> $crate::graph::MGResult where TS_: sophia_api::term::TTerm + ?Sized, TP_: sophia_api::term::TTerm + ?Sized, @@ -105,7 +110,12 @@ macro_rules! impl_mutable_graph_for_indexed_graph { { Ok(self.insert_indexed(s, p, o).is_some()) } - fn remove(&mut self, s: &TS_, p: &TP_, o: &TO_) -> MGResult + fn remove( + &mut self, + s: &TS_, + p: &TP_, + o: &TO_, + ) -> $crate::graph::MGResult where TS_: sophia_api::term::TTerm + ?Sized, TP_: sophia_api::term::TTerm + ?Sized, diff --git a/sophia/src/graph/inmem.rs b/sophia/src/graph/inmem.rs index 51769a37..a62a83e9 100644 --- a/sophia/src/graph/inmem.rs +++ b/sophia/src/graph/inmem.rs @@ -32,8 +32,8 @@ //! [`FastGraph`]: type.FastGraph.html //! [`LightGraph`]: type.LightGraph.html -use super::_traits::*; use super::indexed::*; +use sophia_api::graph::{CollectibleGraph, Graph, MutableGraph, SetGraph}; use sophia_term::factory::*; use sophia_term::*; @@ -70,9 +70,9 @@ pub type FastGraph = FastWrapper>; pub type LightGraph = GenericGraph; #[cfg(test)] -test_graph_impl!(test_fastg, FastGraph); +sophia_api::test_graph_impl!(test_fastg, FastGraph); #[cfg(all(test, feature = "all_tests"))] -test_graph_impl!(test_lightg, LightGraph); +sophia_api::test_graph_impl!(test_lightg, LightGraph); /// Flavors of Graph implementations with a smaller memory-footprint. /// @@ -89,9 +89,9 @@ pub mod small { pub type LightGraph = GenericGraph; #[cfg(all(test, feature = "all_tests"))] - test_graph_impl!(test_fastg, FastGraph); + sophia_api::test_graph_impl!(test_fastg, FastGraph); #[cfg(all(test, feature = "all_tests"))] - test_graph_impl!(test_lightg, LightGraph); + sophia_api::test_graph_impl!(test_lightg, LightGraph); } /// Flavors of Graph implementations which are safe to share across threads. @@ -106,7 +106,7 @@ pub mod sync { pub type LightGraph = GenericGraph; #[cfg(all(test, feature = "all_tests"))] - test_graph_impl!(test_fastg, FastGraph); + sophia_api::test_graph_impl!(test_fastg, FastGraph); #[cfg(all(test, feature = "all_tests"))] - test_graph_impl!(test_lightg, LightGraph); + sophia_api::test_graph_impl!(test_lightg, LightGraph); } diff --git a/sophia/src/graph/inmem/_hash_graph.rs b/sophia/src/graph/inmem/_hash_graph.rs index 49c44324..6af42497 100644 --- a/sophia/src/graph/inmem/_hash_graph.rs +++ b/sophia/src/graph/inmem/_hash_graph.rs @@ -180,7 +180,7 @@ where I::Index: Hash, ::TermData: 'static, { - impl_mutable_graph_for_indexed_graph!(); + crate::impl_mutable_graph_for_indexed_graph!(); } impl SetGraph for HashGraph diff --git a/sophia/src/graph/inmem/_ops_wrapper.rs b/sophia/src/graph/inmem/_ops_wrapper.rs index 86900532..32000fd2 100644 --- a/sophia/src/graph/inmem/_ops_wrapper.rs +++ b/sophia/src/graph/inmem/_ops_wrapper.rs @@ -5,6 +5,7 @@ use std::iter::empty; use super::*; use crate::triple::streaming_mode::{ByTermRefs, StreamedTriple}; +use sophia_api::graph::{GResultTermSet, GTripleSource}; use sophia_api::term::TTerm; /// A [`GraphWrapper`](trait.GraphWrapper.html) @@ -160,14 +161,14 @@ impl CollectibleGraph for OpsWrapper where T: IndexedGraph + Graph::TermData>>>, { - impl_collectible_graph_for_indexed_graph!(); + crate::impl_collectible_graph_for_indexed_graph!(); } impl MutableGraph for OpsWrapper where T: IndexedGraph + Graph::TermData>>>, { - impl_mutable_graph_for_indexed_graph!(); + crate::impl_mutable_graph_for_indexed_graph!(); } impl SetGraph for OpsWrapper @@ -180,4 +181,4 @@ where #[cfg(all(test, feature = "all_tests"))] type OpsGraph = OpsWrapper; #[cfg(all(test, feature = "all_tests"))] -test_graph_impl!(OpsGraph); +sophia_api::test_graph_impl!(OpsGraph); diff --git a/sophia/src/graph/inmem/_spo_wrapper.rs b/sophia/src/graph/inmem/_spo_wrapper.rs index f002ad9d..152a6a8c 100644 --- a/sophia/src/graph/inmem/_spo_wrapper.rs +++ b/sophia/src/graph/inmem/_spo_wrapper.rs @@ -5,6 +5,7 @@ use std::iter::empty; use super::*; use crate::triple::streaming_mode::{ByTermRefs, StreamedTriple}; +use sophia_api::graph::{GResultTermSet, GTripleSource}; use sophia_api::term::TTerm; /// A [`GraphWrapper`](trait.GraphWrapper.html) @@ -160,14 +161,14 @@ impl CollectibleGraph for SpoWrapper where T: IndexedGraph + Graph::TermData>>>, { - impl_collectible_graph_for_indexed_graph!(); + crate::impl_collectible_graph_for_indexed_graph!(); } impl MutableGraph for SpoWrapper where T: IndexedGraph + Graph::TermData>>>, { - impl_mutable_graph_for_indexed_graph!(); + crate::impl_mutable_graph_for_indexed_graph!(); } impl SetGraph for SpoWrapper @@ -180,4 +181,4 @@ where #[cfg(all(test, feature = "all_tests"))] type SpoGraph = super::SpoWrapper; #[cfg(all(test, feature = "all_tests"))] -test_graph_impl!(SpoGraph); +sophia_api::test_graph_impl!(SpoGraph); diff --git a/sophia/src/graph/inmem/_wrapper.rs b/sophia/src/graph/inmem/_wrapper.rs index bda03277..7bd2988c 100644 --- a/sophia/src/graph/inmem/_wrapper.rs +++ b/sophia/src/graph/inmem/_wrapper.rs @@ -1,6 +1,7 @@ // this module is transparently re-exported by its parent `graph::inmem` use super::*; +use sophia_api::graph::{GResult, GResultTermSet, GTerm, GTripleSource}; use sophia_api::term::TTerm; use std::hash::Hash; @@ -305,7 +306,9 @@ macro_rules! impl_graph_for_wrapper { } #[inline] - fn subjects(&self) -> GResult>> + fn subjects( + &self, + ) -> $crate::graph::GResult>> where $crate::graph::GTerm: Clone + Eq + std::hash::Hash, { @@ -313,7 +316,9 @@ macro_rules! impl_graph_for_wrapper { } #[inline] - fn predicates(&self) -> GResult>> + fn predicates( + &self, + ) -> $crate::graph::GResult>> where $crate::graph::GTerm: Clone + Eq + std::hash::Hash, { @@ -321,7 +326,9 @@ macro_rules! impl_graph_for_wrapper { } #[inline] - fn objects(&self) -> GResult>> + fn objects( + &self, + ) -> $crate::graph::GResult>> where $crate::graph::GTerm: Clone + Eq + std::hash::Hash, { @@ -329,7 +336,9 @@ macro_rules! impl_graph_for_wrapper { } #[inline] - fn iris(&self) -> GResult>> + fn iris( + &self, + ) -> $crate::graph::GResult>> where $crate::graph::GTerm: Clone + Eq + std::hash::Hash, { @@ -337,7 +346,9 @@ macro_rules! impl_graph_for_wrapper { } #[inline] - fn bnodes(&self) -> GResult>> + fn bnodes( + &self, + ) -> $crate::graph::GResult>> where $crate::graph::GTerm: Clone + Eq + std::hash::Hash, { @@ -345,7 +356,9 @@ macro_rules! impl_graph_for_wrapper { } #[inline] - fn literals(&self) -> GResult>> + fn literals( + &self, + ) -> $crate::graph::GResult>> where $crate::graph::GTerm: Clone + Eq + std::hash::Hash, { @@ -353,7 +366,9 @@ macro_rules! impl_graph_for_wrapper { } #[inline] - fn variables(&self) -> GResult>> + fn variables( + &self, + ) -> $crate::graph::GResult>> where $crate::graph::GTerm: Clone + Eq + std::hash::Hash, { diff --git a/sophia/src/lib.rs b/sophia/src/lib.rs index e3451aee..5422320b 100644 --- a/sophia/src/lib.rs +++ b/sophia/src/lib.rs @@ -63,27 +63,29 @@ //! # Ok::<(), Box>(()) //! ``` -pub mod dataset; -pub mod graph; pub mod parser; -pub mod quad; pub mod query; pub mod serializer; -pub mod triple; -/// This module re-exorts things from `sophia_term`, -/// to ease transition from older versions of Sophia. -/// It will eventually be deprecated. -/// -/// See [`sophia_term::ns`](https://docs.rs/sophia_term/latest/sophia_term/ns/index.html) +pub mod dataset { + pub use sophia_api::dataset::*; + pub mod indexed; + pub mod inmem; +} +pub mod graph { + pub use sophia_api::graph::*; + pub mod indexed; + pub mod inmem; +} pub mod ns { pub use sophia_api::ns::*; } -/// This module re-exorts things from `sophia_term`, -/// to ease transition from older versions of Sophia. -/// It will eventually be deprecated. -/// -/// See [`sophia_term`](https://docs.rs/sophia_term/latest/sophia_term/) +pub mod quad { + pub use sophia_api::quad::*; +} pub mod term { pub use sophia_term::*; } +pub mod triple { + pub use sophia_api::triple::*; +} diff --git a/sophia/src/parser.rs b/sophia/src/parser.rs index 65919466..07f4e849 100644 --- a/sophia/src/parser.rs +++ b/sophia/src/parser.rs @@ -1,7 +1,7 @@ //! API for parsing RDF syntaxes. -use crate::quad::stream::QuadSource; -use crate::triple::stream::TripleSource; +use sophia_api::quad::stream::QuadSource; +use sophia_api::triple::stream::TripleSource; mod _location; pub use _location::*; diff --git a/sophia/src/parser/rio_common.rs b/sophia/src/parser/rio_common.rs index 3ba2b132..df4811e1 100644 --- a/sophia/src/parser/rio_common.rs +++ b/sophia/src/parser/rio_common.rs @@ -6,10 +6,10 @@ use std::result::Result as StdResult; use rio_api::model::*; use rio_api::parser::*; -use crate::quad::stream::*; -use crate::quad::streaming_mode::StreamedQuad; -use crate::triple::stream::*; -use crate::triple::streaming_mode::StreamedTriple; +use sophia_api::quad::stream::*; +use sophia_api::quad::streaming_mode::StreamedQuad; +use sophia_api::triple::stream::*; +use sophia_api::triple::streaming_mode::StreamedTriple; use sophia_term::literal::convert::AsLiteral; use sophia_term::{BoxTerm, RefTerm}; @@ -66,7 +66,7 @@ where } pub type RioSourceTriple<'a> = [RefTerm<'a>; 3]; -crate::make_scoped_triple_streaming_mode!(ScopedRioSourceTriple, RioSourceTriple); +sophia_api::make_scoped_triple_streaming_mode!(ScopedRioSourceTriple, RioSourceTriple); impl TripleSource for StrictRioSource where @@ -105,7 +105,7 @@ where } pub type RioSourceQuad<'a> = ([RefTerm<'a>; 3], Option>); -crate::make_scoped_quad_streaming_mode!(ScopedRioSourceQuad, RioSourceQuad); +sophia_api::make_scoped_quad_streaming_mode!(ScopedRioSourceQuad, RioSourceQuad); impl QuadSource for StrictRioSource where diff --git a/sophia/src/serializer/nq.rs b/sophia/src/serializer/nq.rs index 7e05e099..9caf5556 100644 --- a/sophia/src/serializer/nq.rs +++ b/sophia/src/serializer/nq.rs @@ -11,7 +11,7 @@ use std::io; -use crate::quad::{stream::*, Quad}; +use sophia_api::quad::{stream::*, Quad}; use super::nt::write_term; use super::*; diff --git a/sophia/src/serializer/nt.rs b/sophia/src/serializer/nt.rs index 75b24922..e74c7baf 100644 --- a/sophia/src/serializer/nt.rs +++ b/sophia/src/serializer/nt.rs @@ -13,8 +13,6 @@ use sophia_api::ns::xsd; use sophia_api::term::{TTerm, TermKind}; use std::io; -use crate::triple::stream::*; - use super::*; /// N-Triples serializer configuration. From dd34345f89c2179b8f2a11856ff7222f87aa8579 Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Champin Date: Wed, 15 Jul 2020 19:22:51 +0200 Subject: [PATCH 08/11] adapt and improve isomorphism algorithms the new version of the algorithm is no longer limited by a MAX_DISTANCE parameter. It is still heuristic and may generate false positives, in the pathological case of multiple locally-identical bnodes. They will never return false negatives, though. --- api/src/dataset/_isomorphism.rs | 763 ++++++++++++++----------------- api/src/graph/_isomorphism.rs | 785 +++++++++++++++----------------- 2 files changed, 711 insertions(+), 837 deletions(-) diff --git a/api/src/dataset/_isomorphism.rs b/api/src/dataset/_isomorphism.rs index ff14043e..85139c4e 100644 --- a/api/src/dataset/_isomorphism.rs +++ b/api/src/dataset/_isomorphism.rs @@ -4,16 +4,14 @@ //! Its public member are transparently re-exported by its [parent module](../index.html). use crate::dataset::{DQuad, DTerm, Dataset}; -use crate::graph::{bn_mapper, hash_if_not_bn, match_ignore_bns}; +use crate::graph::{hash_if_not_bn, match_ignore_bns}; use crate::quad::Quad; use crate::term::matcher::AnyOrExactlyRef; use crate::term::{TTerm, TermKind}; use crate::triple::stream::{ SinkError, SinkResult as _, SourceError, SourceResult as _, StreamError, StreamResult, }; -use std::collections::{BTreeSet, HashMap}; -use std::error::Error; -use std::fmt; +use std::collections::{HashMap, HashSet}; use std::hash::{Hash, Hasher}; /// Maximal steps a dataset is traversed for proofing isomorphism. @@ -24,36 +22,33 @@ pub const MAX_DISTANCE: usize = 8; /// The hasher used internally for checking isomorphism. pub type IsoHasher = std::collections::hash_map::DefaultHasher; -#[derive(Debug, Clone, Copy)] -struct AlgorithmFailure; - -impl fmt::Display for AlgorithmFailure { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "Failed to execute the Algorithm") - } -} - -impl Error for AlgorithmFailure {} - /// Checks if both datasets are isomorphic blank node equal. /// /// According to the [RDF specs](https://www.w3.org/TR/2014/REC-rdf11-concepts-20140225/#graph-isomorphism) /// this means that a mapping for blank nodes in `d1` exists so that `d1 == d2`. /// -/// The used algorithm was originally implemented for [`Oxigraph`](https://github.com/Tpt/oxigraph) +/// The algorithm is inspired from a similar one in [`Oxigraph`](https://github.com/Tpt/oxigraph) /// and is extended for the generalized RDF model of `sophia`. /// -/// # Errors +/// # Performance /// -/// Both datasets may fail traversing (and this is done several times). -/// Accordingly, a `StreamError` returned where `SourceError`s originate from -/// `d1` and `SinkError`s originate from `d2` +/// As this algorithm has to enumerates the quads of each dataset several times, +/// the algorithm gets more expensive with bigger numbers of quads. +/// In the same way the number of blank nodes contributes to the cost. /// -/// # Performance +/// Note however that the algorithm uses some heuristics, +/// to avoid the combinatorial explosion of trying every possible bnode-pairing. +/// As a result, it is not 100% accurate (see below). +/// +/// # Accuracy +/// +/// If `d1` and `d2` are isomorphic, the function will always return `true`. /// -/// As this algorithm has to traverse each graph several times the algorithm -/// gets way more expensive with bigger numbers of quads. In the same way -/// the number of blank nodes contributes to the costs. +/// If they are not isomorphic, the function will generally return `false`, +/// but a few pathological cases may be falses positives +/// (*i.e.* recognized as isomorphic while they are not). +/// +/// See [`isomorphic_graphs`](../graph/fn.isomorphic_graphs.html) for pathological examples. pub fn isomorphic_datasets(d1: &D1, d2: &D2) -> StreamResult where D1: Dataset, @@ -88,97 +83,32 @@ where // ------------------------------------- // - regardless of blank nodes // - implicitly checks that d1 and d2 have the same length - let d1_in_d2 = check_for_equal_quads_regardless_bns(d1, d2)?; - let d2_in_d1 = check_for_equal_quads_regardless_bns(d2, d1).map_err(StreamError::reverse)?; - - if !(d1_in_d2 && d2_in_d1) { + if !check_for_equal_quads_regardless_bns(d1, d2)? { + return Ok(false); + } + if !check_for_equal_quads_regardless_bns(d2, d1).map_err(StreamError::reverse)? { return Ok(false); } // Create hashes - let bn_hashes1 = match calc_bn_hashes::(d1) { + let bn_hashes1 = match calc_bn_hashes::(d1, bns1) { Ok(map) => map, - Err(SourceError(e)) => return Err(SourceError(e)), - Err(SinkError(_)) => return Ok(false), // Not the best solution + Err(e) => return Err(SourceError(e)), }; - let bn_hashes2 = match calc_bn_hashes::(d2) { + let bn_hashes2 = match calc_bn_hashes::(d2, bns2) { Ok(map) => map, - Err(SourceError(e)) => return Err(SinkError(e)), - Err(SinkError(_)) => return Ok(false), // Not the best solution + Err(e) => return Err(SinkError(e)), }; - // Create mapping - let mut bn_mapping = HashMap::new(); + // Check that, for each hash, there are the same number of bnodes in each graph. for (hash, bns1) in bn_hashes1 { - for bn1 in bns1 { - let bn2 = match bn_hashes2.get(&hash) { - Some(bn) => bn, - None => return Ok(false), // No matching blank node in g2! - }; - bn_mapping.insert(bn1, bn2); - } - } - - // Apply mapping - isomorphic_datasets_with_mapping(d1, d2, bn_mapping) -} - -/// Builds a `GraphNameMatcher` by using the blank node mapping provided. -/// -/// If the given term is a blank node the matcher will match all possible -/// mappings for that blank node, i.e. included redundant blank nodes. If the -/// given term is not a blank node the matcher will only match the given term. -fn bn_mapper_for_gname<'m, T1, T2>( - mapping: &'m HashMap>, - g: Option<&'m T1>, -) -> Vec> -where - T1: TTerm + Hash + Eq, - T2: TTerm + Hash + Eq, -{ - if g.map(TTerm::kind) == Some(TermKind::BlankNode) { - match mapping.get(g.unwrap()) { - None => vec![], - Some(bns) => bns.iter().map(|n| Some(n.as_dyn())).collect(), - } - } else { - vec![g.map(TTerm::as_dyn)] - } -} - -/// Checks for each quad in `d1` with at least one blank node if it is also -/// contained in `d2` if the blank node `mapping` is applied. -fn isomorphic_datasets_with_mapping( - d1: &D1, - d2: &D2, - mapping: HashMap, &Vec>>, -) -> StreamResult -where - D1: Dataset, - D2: Dataset, - DTerm: Clone + Eq + Hash, - DTerm: Clone + Eq + Hash, -{ - for q in d1.quads() { - let q = q.source_err()?; - - if q.s().kind() == TermKind::BlankNode - || q.p().kind() == TermKind::BlankNode - || q.o().kind() == TermKind::BlankNode - || q.g().map(TTerm::kind) == Some(TermKind::BlankNode) - { - let ms = bn_mapper(&mapping, q.s()); - let mp = bn_mapper(&mapping, q.p()); - let mo = bn_mapper(&mapping, q.o()); - let mg = bn_mapper_for_gname(&mapping, q.g()); - - if d2.quads_matching(&ms, &mp, &mo, &mg).next().is_none() { - return Ok(false); - } + let bns1_len = bns1.len(); + let bns2_len = bn_hashes2.get(&hash).map(|x| x.len()).unwrap_or(0); + if bns1_len != bns2_len { + return Ok(false); // Not the same number of "equivalent" bnodes } } - - Ok(true) + Ok(true) // heuristically } fn match_gname_ignore_bns(g: Option<&T>) -> AnyOrExactlyRef> @@ -219,151 +149,166 @@ where /// Calculate a hash for each blank node. /// -/// The hash of a blank node in a dataset is the hash of all terms in the quads -/// in which the blank node occurs. Should this not be enough to create -/// distinct hashes, the dataset is further traversed starting from the initial -/// quads. -/// -/// Blank nodes are not included in calculating the hashes. +/// We first compute a hash based on all adjacent triples, ignoring bnodes. /// -/// The hashes are distinct if every 'bucket', i.e. the `Vec` in the returning -/// `HashMap` has only one element. -/// -/// An exception are redundant blank nodes. If the algorithm detects such nodes -/// they will share the same hash. +/// If several blank nodes have the same hash, +/// we modify their hash with the hash of their adjacent blank nodes. +/// We repeat this step until either +/// - we reached a point where each blank node has a unique hash, or +/// - the last step didn't change the number of distinct hash. +/// At this point, if several blank nodes share the same hash, +/// they must be absolutely redundant. fn calc_bn_hashes( d: &D, -) -> StreamResult>>, D::Error, AlgorithmFailure> + bnodes: HashSet>, +) -> Result>>, D::Error> where D: Dataset, DTerm: Clone + Eq + Hash, H: Hasher + Default, { - let mut res_map = HashMap::new(); - let mut unresolved_map = HashMap::new(); - - for bn in d.bnodes().source_err()?.into_iter() { - let (hash, upstream, downstream) = calc_bns_init_hash::(&bn, d).source_err()?; - unresolved_map - .entry(hash) - .or_insert_with(Vec::new) - .push((bn, upstream, downstream)); + let mut n2h = HashMap::new(); + let mut map = HashMap::new(); + + let n_bnodes = bnodes.len(); + for bn in bnodes { + let (hash, related) = calc_bns_init_hash::(&bn, d)?; + n2h.insert(bn.clone(), hash); + map.entry(hash).or_insert_with(Vec::new).push((bn, related)); } - let mut last_map = unresolved_map; - unresolved_map = HashMap::new(); + let mut len_old_map = 0; - let mut i = 0; + while map.len() < n_bnodes && map.len() != len_old_map { + len_old_map = map.len(); + let last_map = map; + map = HashMap::new(); + let last_n2h = n2h.clone(); - while !last_map.is_empty() && i < MAX_DISTANCE { - for (hash, bns) in last_map.into_iter() { + for (hash, bns) in last_map { if bns.len() == 1 { - // Distinct hash. - let (bn, _, _) = bns.into_iter().next().expect("len == 1"); - res_map.insert(hash, vec![bn]); - } else if bns - .iter() - .all(|(_, upstream, downstream)| upstream.is_empty() && downstream.is_empty()) - { - // Can no longer traverse dataset to distinguish nodes, i.e. they must be redundant. - let redundants = bns.into_iter().map(|(bn, _, _)| bn).collect(); - res_map.insert(hash, redundants); + map.insert(hash, bns); } else { - // improve hash by further traversing. - for (bn, upstream, downstream) in bns { - let (better_hash, upstream, downstream) = - improve_hash_by_increasing_distance::( - hash, - &upstream, - &downstream, - d, - ) - .source_err()?; - unresolved_map - .entry(better_hash) + for (bn, related) in bns { + let mut hasher = H::default(); + hash.hash(&mut hasher); + + let mut modifiers = Vec::new(); + for (role, other) in related.iter() { + modifiers.push((role, last_n2h[other])); + } + modifiers.sort_unstable(); // to ensure reproducibility + for (role, hash) in modifiers { + role.hash(&mut hasher); + hash.hash(&mut hasher); + } + let new_hash = hasher.finish(); + *n2h.get_mut(&bn).unwrap() = new_hash; + map.entry(new_hash) .or_insert_with(Vec::new) - .push((bn, upstream, downstream)); + .push((bn, related)); } } } - - last_map = unresolved_map; - unresolved_map = HashMap::new(); - i += 1; + //dbg_map::(&map); } - - if i >= MAX_DISTANCE { - return Err(AlgorithmFailure).sink_err(); + let mut ret = HashMap::with_capacity(map.len()); + for (hash, bns) in map { + let v = bns.into_iter().map(|(bn, _)| bn).collect(); + ret.insert(hash, v); } - - Ok(res_map) + Ok(ret) } -/// Calculate the blank node's initial hash in the dataset. +/// Calculate the blank node's initial hash in the graph, i.e. for distance 0. /// -/// Returns the initial hash, the upstream nodes and the downstream nodes. +/// Returns the initial hash, and a vec of related blank node +/// (associated with an opaque role identifier ) #[allow(clippy::type_complexity)] -fn calc_bns_init_hash( - bn: &DTerm, - d: &D, -) -> Result<(u64, Vec>, Vec>), D::Error> +fn calc_bns_init_hash(bn: &DTerm, d: &D) -> Result<(u64, Vec<(u8, DTerm)>), D::Error> where D: Dataset, DTerm: Clone + Eq + Hash, H: Hasher + Default, { - // for same hashing result we need to order the quads' hashes. - let mut hashes = BTreeSet::new(); - - let mut upstream = vec![]; - let mut downstream = vec![]; + let mut quad_hashes = Vec::new(); + let mut related = vec![]; - for quad in d.quads() { + for quad in d.quads_with_s(bn) { let quad = quad?; - if quad.s() == bn || quad.p() == bn || quad.o() == bn || quad.g() == Some(bn) { - hashes.insert(hash_quad_without_bn::>(&quad)); - if quad.o() != bn { - upstream.push(quad.o().clone()) - }; - if quad.s() != bn { - downstream.push(quad.s().clone()) - }; + quad_hashes.push(hash_quad_without_bn::>(&quad)); + let p = quad.p(); + if p.kind() == TermKind::BlankNode && p != bn { + related.push((0, p.clone())); + } + let o = quad.o(); + if o.kind() == TermKind::BlankNode && o != bn { + related.push((1, o.clone())); + } + if let Some(g) = quad.g() { + if g.kind() == TermKind::BlankNode && g != bn { + related.push((2, g.clone())); + } + } + } + for quad in d.quads_with_p(bn) { + let quad = quad?; + quad_hashes.push(hash_quad_without_bn::>(&quad)); + let s = quad.s(); + if s.kind() == TermKind::BlankNode && s != bn { + related.push((3, s.clone())); + } + let o = quad.o(); + if o.kind() == TermKind::BlankNode && o != bn { + related.push((4, o.clone())); + } + if let Some(g) = quad.g() { + if g.kind() == TermKind::BlankNode && g != bn { + related.push((5, g.clone())); + } + } + } + for quad in d.quads_with_o(bn) { + let quad = quad?; + quad_hashes.push(hash_quad_without_bn::>(&quad)); + let s = quad.s(); + if s.kind() == TermKind::BlankNode && s != bn { + related.push((6, s.clone())); + } + let p = quad.p(); + if p.kind() == TermKind::BlankNode && p != bn { + related.push((7, p.clone())); + } + if let Some(g) = quad.g() { + if g.kind() == TermKind::BlankNode && g != bn { + related.push((8, g.clone())); + } + } + } + for quad in d.quads_with_g(Some(bn)) { + let quad = quad?; + quad_hashes.push(hash_quad_without_bn::>(&quad)); + let s = quad.s(); + if s.kind() == TermKind::BlankNode && s != bn { + related.push((9, s.clone())); + } + let p = quad.p(); + if p.kind() == TermKind::BlankNode && p != bn { + related.push((10, p.clone())); + } + let o = quad.o(); + if o.kind() == TermKind::BlankNode && o != bn { + related.push((11, o.clone())); } } - // hashing - let mut hasher = H::default(); - hashes.into_iter().for_each(|h| h.hash(&mut hasher)); - - Ok((hasher.finish(), upstream, downstream)) -} - -/// Improves an existing hash by further traversing the dataset. -#[allow(clippy::type_complexity)] -fn improve_hash_by_increasing_distance( - hash: u64, - upstream: &[DTerm], - downstream: &[DTerm], - d: &D, -) -> Result<(u64, Vec>, Vec>), D::Error> -where - D: Dataset, - DTerm: Clone + Eq + Hash, - H: Hasher + Default, -{ - // for same hashing result we need to order the quads' hashes. - let mut hashes = BTreeSet::new(); - - let upstream = traverse_from_s_to_o::(upstream, d, &mut hashes)?; - let downstream = traverse_from_o_to_s::(downstream, d, &mut hashes)?; + quad_hashes.sort_unstable(); // to ensure reproducibility // hashing let mut hasher = H::default(); - // initialize with existing hash. - hash.hash(&mut hasher); - hashes.into_iter().for_each(|h| h.hash(&mut hasher)); + quad_hashes.into_iter().for_each(|h| h.hash(&mut hasher)); - Ok((hasher.finish(), upstream, downstream)) + Ok((hasher.finish(), related)) } fn hash_quad_without_bn(q: &Q) -> u64 @@ -372,256 +317,240 @@ where Q: Quad, { let mut h = H::default(); - hash_if_not_bn(q.s(), &mut h); - hash_if_not_bn(q.p(), &mut h); - hash_if_not_bn(q.o(), &mut h); + hash_if_not_bn(q.s(), 0, &mut h); + hash_if_not_bn(q.p(), 1, &mut h); + hash_if_not_bn(q.o(), 2, &mut h); if let Some(g) = q.g() { - hash_if_not_bn(g, &mut h) + hash_if_not_bn(g, 3, &mut h) } h.finish() } -/// Looks for quads where the given terms are objects. -/// Those quads' hashes are inserted into the list and a list of their -/// subjects is returned. -fn traverse_from_o_to_s( - objects: &[DTerm], - d: &D, - hashes: &mut BTreeSet, -) -> Result>, D::Error> -where - D: Dataset, - DTerm: Clone + Eq + Hash, - H: Hasher + Default, -{ - let mut subjects = vec![]; - for o in objects { - for quad in d.quads_with_o(o) { - let quad = quad?; - hashes.insert(hash_quad_without_bn::>(&quad)); - subjects.push(quad.s().clone()); - } - } - Ok(subjects) -} - -/// Looks for quads where the given terms are subjects. -/// Those quads' hashes are inserted into the list and a list of their -/// objects is returned. -fn traverse_from_s_to_o( - subjects: &[DTerm], - d: &D, - hashes: &mut BTreeSet, -) -> Result>, D::Error> -where - D: Dataset, - DTerm: Clone + Eq + Hash, - H: Hasher + Default, -{ - let mut objects = vec![]; - for s in subjects { - for quad in d.quads_with_s(s) { - let quad = quad?; - hashes.insert(hash_quad_without_bn::>(&quad)); - objects.push(quad.o().clone()); - } - } - Ok(objects) -} - -/* #[cfg(test)] mod test { use super::*; - use crate::dataset::inmem::FastDataset; - use crate::parser::{gtrig, nq}; - use crate::quad::stream::QuadSource; + use crate::ns::xsd; + use crate::term::test::TestTerm; + use std::error::Error; + + type StaticTerm = TestTerm<&'static str>; #[test] fn simple() -> Result<(), Box> { - let d1 = r#" - @prefix rdf: . - @prefix dc: . - @prefix foaf: . - - { - dc:publisher "Bob" . - dc:publisher "Alice" . - } - - - { - _:a foaf:name "Bob" . - _:a foaf:mbox . - _:a foaf:knows _:b . - } + let foaf = "http://xmlns.com/foaf/0.1/"; + let foaf_knows = StaticTerm::iri2(foaf, "knows"); + let foaf_mbox = StaticTerm::iri2(foaf, "mbox"); + let foaf_name = StaticTerm::iri2(foaf, "name"); + let mbox_alice = StaticTerm::iri("mailto:alice@work.example"); + let lit_alice = StaticTerm::lit_dt("alice", xsd::string); + let lit_bob = StaticTerm::lit_dt("bob", xsd::string); + + let make_dataset = + |b1: &'static str, b2: &'static str| -> Vec<([StaticTerm; 3], Option)> { + let b1 = StaticTerm::bnode(b1); + let b2 = StaticTerm::bnode(b2); + vec![ + ([b1, foaf_name, lit_alice], None), + ([b1, foaf_mbox, mbox_alice], None), + ([b1, foaf_knows, b2], None), + ([b2, foaf_name, lit_bob], Some(b1)), + ] + }; + let d1 = make_dataset("alice", "bob"); + assert!(isomorphic_datasets(&d1, &d1)?); - - { - _:b foaf:name "Alice" . - _:b foaf:mbox . - } - "#; - let d2 = r#" - @prefix rdf: . - @prefix dc: . - @prefix foaf: . - - { - dc:publisher "Bob" . - dc:publisher "Alice" . - } + let d2 = make_dataset("a", "b"); + assert!(isomorphic_datasets(&d1, &d2)?); + assert!(isomorphic_datasets(&d2, &d1)?); - - { - _:a2 foaf:name "Bob" . - _:a2 foaf:mbox . - _:a2 foaf:knows _:b2 . - } + let d3 = make_dataset("b", "a"); + assert!(isomorphic_datasets(&d2, &d3)?); + assert!(isomorphic_datasets(&d1, &d3)?); - - { - _:b2 foaf:name "Alice" . - _:b2 foaf:mbox . - } - "#; - let d3 = r#" - @prefix rdf: . - @prefix dc: . - @prefix foaf: . - - { - dc:publisher "Bob" . - dc:publisher "Alice" . - } + let b1 = StaticTerm::bnode("alice"); + let d4 = vec![ + ([b1, foaf_name, lit_alice], None), + ([b1, foaf_mbox, mbox_alice], None), + ([b1, foaf_knows, StaticTerm::bnode("bob")], None), + ([StaticTerm::bnode("bobby"), foaf_name, lit_bob], Some(b1)), + ]; + assert!(!isomorphic_datasets(&d1, &d4)?); + assert!(!isomorphic_datasets(&d4, &d1)?); - - { - _:a3 foaf:name "Bob" . - _:a3 foaf:mbox . - _:a3 foaf:knows _:b3 . - } + Ok(()) + } - - { - _:c3 foaf:name "Alice" . - _:c3 foaf:mbox . - } - "#; - let d1: FastDataset = gtrig::parse_str(d1).collect_quads()?; - let d2: FastDataset = gtrig::parse_str(d2).collect_quads()?; - let d3: FastDataset = gtrig::parse_str(d3).collect_quads()?; + fn make_chain(ids: &'static str) -> Vec<[StaticTerm; 4]> { + let rel = StaticTerm::iri("tag:rel"); + let nodes: Vec<_> = (0..ids.len()) + .map(|i| StaticTerm::bnode(&ids[i..i + 1])) + .collect(); + let mut dataset = Vec::with_capacity(ids.len() - 1); + for i in 1..nodes.len() { + dataset.push([nodes[i - 1], rel, nodes[i], nodes[i - 1]]); + } + dataset + } + #[test] + fn chain() -> Result<(), Box> { + let d1 = make_chain("abcdefghij"); + assert!(isomorphic_datasets(&d1, &d1)?); + let d2 = make_chain("jihgfedcba"); assert!(isomorphic_datasets(&d1, &d2)?); assert!(isomorphic_datasets(&d2, &d1)?); + + let d3 = make_chain("abcdefghijk"); assert!(!isomorphic_datasets(&d1, &d3)?); - assert!(!isomorphic_datasets(&d2, &d3)?); + Ok(()) + } + + #[test] + fn cycle2() -> Result<(), Box> { + let d1 = make_chain("aba"); + assert!(isomorphic_datasets(&d1, &d1)?); + let d2 = make_chain("ABA"); + assert!(isomorphic_datasets(&d1, &d2)?); + assert!(isomorphic_datasets(&d2, &d1)?); + Ok(()) + } + #[test] + fn cycle_long() -> Result<(), Box> { + let d1 = make_chain("abcdefghia"); + assert!(isomorphic_datasets(&d1, &d1)?); + let d2 = make_chain("jihgfedcbj"); + assert!(isomorphic_datasets(&d1, &d2)?); + assert!(isomorphic_datasets(&d2, &d1)?); + + let d3 = make_chain("abcdefghija"); + assert!(!isomorphic_datasets(&d1, &d3)?); Ok(()) } #[test] - fn different_parsers() -> Result<(), Box> { - let trig = r#" - @prefix rdf: . - @prefix dc: . - @prefix foaf: . - - { - dc:publisher "Bob" . - dc:publisher "Alice" . - } + #[ignore] + fn cycle_pathological() -> Result<(), Box> { + // This case is tricky (and does not work with the current implementation). + // Both graphs contain the same number of (blank nodes) and the same number of arcs. + // All blank nodes are locally undistinguishable from each other: + // - they have exactly 1 incoming arc and 1 outgoing arc, + // - both linking them to a blank node that are themselves undistinguisgable. + let mut d1 = make_chain("abca"); + let mut d1b = make_chain("defgd"); + d1.append(&mut d1b); + + let d2 = make_chain("abcdefga"); + assert!(!isomorphic_datasets(&d1, &d2)?); + Ok(()) + } - - { - _:a foaf:name "Bob" . - _:a foaf:mbox . - _:a foaf:knows _:b . - } + #[test] + fn cycle_almost_pathological() -> Result<(), Box> { + // This is uses the same graphs as above (cycle_pathological), + // but *one* of the blank nodes is distinguished by an additional property, + // which breaks symmetry and allow the algorithm to give the correct answer. + // + // This illustrate why the pathological case is not too bad: + // in real data, *most* be nodes will be distinguisgable like that. + let typ = StaticTerm::iri("tag:type"); + let dist = StaticTerm::iri("tag:Distinguished"); + + let mut d1 = make_chain("abca"); + let mut d1b = make_chain("defgd"); + d1.append(&mut d1b); + d1.push([d1[0][0], typ, dist, d1[0][0]]); + + let mut d2 = make_chain("abcdefga"); + d2.push([d2[0][0], typ, dist, d2[0][0]]); + assert!(!isomorphic_datasets(&d1, &d2)?); + Ok(()) + } - - { - _:b foaf:name "Alice" . - _:b foaf:mbox . + fn make_clique(ids: &'static str) -> Vec<[StaticTerm; 4]> { + let rel = StaticTerm::iri("tag:rel"); + let nodes: Vec<_> = (0..ids.len()) + .map(|i| StaticTerm::bnode(&ids[i..i + 1])) + .collect(); + let mut dataset = Vec::with_capacity(ids.len() * ids.len()); + for n1 in nodes.iter() { + for n2 in nodes.iter() { + dataset.push([*n1, rel, *n2, *n1]); } - "#; - let nq = r#" - "Bob" . - "Alice" . - - _:a2 "Bob" . - _:a2 . - _:a2 _:b2 . + } + dataset + } - _:b2 "Alice" . - _:b2 . - "#; - let trig: FastDataset = gtrig::parse_str(trig).collect_quads()?; - let nq: FastDataset = nq::parse_str(nq).collect_quads()?; + #[test] + fn clique() -> Result<(), Box> { + let d1 = make_clique("abcde"); + assert!(isomorphic_datasets(&d1, &d1)?); - assert!(isomorphic_datasets(&nq, &trig)?); - assert!(isomorphic_datasets(&trig, &nq)?); + let d2 = make_clique("ABCDE"); + assert!(isomorphic_datasets(&d1, &d2)?); + assert!(isomorphic_datasets(&d2, &d1)?); + let d3 = make_clique("abcd"); + assert!(!isomorphic_datasets(&d1, &d3)?); Ok(()) } - #[test] - fn bn_names() -> Result<(), Box> { - let d1 = r#" - @prefix rdf: . - @prefix dc: . - @prefix foaf: . - - - { - _:bob dc:publisher "Bob" . - _:alice dc:publisher "Alice" . + fn make_tree(ids: &'static str) -> Vec<[StaticTerm; 4]> { + let rel = StaticTerm::iri("tag:rel"); + let nodes: Vec<_> = (0..ids.len()) + .map(|i| StaticTerm::bnode(&ids[i..i + 1])) + .collect(); + let mut dataset = Vec::with_capacity(ids.len() * ids.len()); + let mut i = 0; + while 2 * i < nodes.len() { + dataset.push([nodes[i], rel, nodes[2 * i], nodes[i]]); + if 2 * i + 1 < nodes.len() { + dataset.push([nodes[i], rel, nodes[2 * i + 1], nodes[i]]); } + i += 1; + } + dataset + } - _:bob - { - _:a foaf:name "Bob" . - _:a foaf:mbox . - _:a foaf:knows _:b . - } + #[test] + fn tree() -> Result<(), Box> { + let d1 = make_tree("abcdefghij"); + assert!(isomorphic_datasets(&d1, &d1)?); - _:alice - { - _:b foaf:name "Alice" . - _:b foaf:mbox . - } - "#; - let d2 = r#" - @prefix rdf: . - @prefix dc: . - @prefix foaf: . - - - { - _:bob2 dc:publisher "Bob" . - _:alice2 dc:publisher "Alice" . - } + let d2 = make_tree("ABCDEFGHIJ"); + assert!(isomorphic_datasets(&d1, &d2)?); + assert!(isomorphic_datasets(&d2, &d1)?); - _:bob2 - { - _:a2 foaf:name "Bob" . - _:a2 foaf:mbox . - _:a2 foaf:knows _:b2 . - } + let d3 = make_tree("abcdefghijk"); + assert!(!isomorphic_datasets(&d1, &d3)?); + Ok(()) + } - _:alice2 - { - _:b2 foaf:name "Alice" . - _:b2 foaf:mbox . - } - "#; - let d1: FastDataset = gtrig::parse_str(d1).collect_quads()?; - let d2: FastDataset = gtrig::parse_str(d2).collect_quads()?; + #[test] + fn predicate_and_gname() -> Result<(), Box> { + let rel = StaticTerm::iri("tag:rel"); + let b1 = StaticTerm::bnode("b1"); + let b2 = StaticTerm::bnode("b2"); + let b3 = StaticTerm::bnode("b3"); + let b4 = StaticTerm::bnode("b4"); + let d1 = vec![[b1, rel, b2, b3], [b2, rel, b3, b4], [rel, b1, b4, b3]]; + assert!(isomorphic_datasets(&d1, &d1)?); + + let d2 = vec![[b2, rel, b3, b4], [b3, rel, b4, b1], [rel, b2, b1, b4]]; assert!(isomorphic_datasets(&d1, &d2)?); assert!(isomorphic_datasets(&d2, &d1)?); + let d3 = vec![[b1, rel, b2, b3], [b2, rel, b3, b4], [rel, b2, b4, b3]]; + // ^^ + assert!(!isomorphic_datasets(&d2, &d3)?); + assert!(!isomorphic_datasets(&d1, &d3)?); + + let d4 = vec![[b1, rel, b2, b3], [b2, rel, b3, b4], [rel, b1, b4, b2]]; + // ^^ + assert!(!isomorphic_datasets(&d2, &d4)?); + assert!(!isomorphic_datasets(&d1, &d4)?); + Ok(()) } } -*/ diff --git a/api/src/graph/_isomorphism.rs b/api/src/graph/_isomorphism.rs index f73aee0e..ed0b6ce0 100644 --- a/api/src/graph/_isomorphism.rs +++ b/api/src/graph/_isomorphism.rs @@ -10,9 +10,8 @@ use crate::triple::stream::{ SinkError, SinkResult as _, SourceError, SourceResult as _, StreamError, StreamResult, }; use crate::triple::Triple; -use std::collections::{BTreeSet, HashMap}; +use std::collections::{HashMap, HashSet}; use std::error::Error; -use std::fmt; use std::hash::{Hash, Hasher}; /// Maximal steps a graph is traversed for proofing isomorphism. @@ -23,36 +22,67 @@ pub const MAX_DISTANCE: usize = 8; /// The hasher used internally for checking isomorphism. pub type IsoHasher = std::collections::hash_map::DefaultHasher; -#[derive(Debug, Clone, Copy)] -struct AlgorithmFailure; - -impl fmt::Display for AlgorithmFailure { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "Failed to execute the Algorithm") - } -} - -impl Error for AlgorithmFailure {} - /// Checks if both graphs are isomorphic blank node equal. /// /// According to the [RDF specs](https://www.w3.org/TR/2014/REC-rdf11-concepts-20140225/#graph-isomorphism) /// this means that a mapping for blank nodes in `g1` exists so that `g1 == g2`. /// -/// The used algorithm was originally implemented for [`Oxigraph`](https://github.com/Tpt/oxigraph) +/// The algorithm is inspired from a similar one in [`Oxigraph`](https://github.com/Tpt/oxigraph) /// and is extended for the generalized RDF model of `sophia`. /// /// # Errors /// /// Both graphs may fail traversing (and this is done several times). -/// Accordingly, a `StreamError` returned where `SourceError`s originate from -/// `g1` and `SinkError`s originate from `g2` +/// Accordingly, a `StreamError` returned, +/// where `SourceError`s originate from `g1` +/// and `SinkError`s originate from `g2` /// /// # Performance /// -/// As this algorithm has to traverse each graph several times the algorithm -/// gets way more expensive with bigger numbers of triples. In the same way -/// the number of blank nodes contributes to the costs. +/// As this algorithm has to enumerates the triples of each graph several times, +/// the algorithm gets more expensive with bigger numbers of triples. +/// In the same way the number of blank nodes contributes to the cost. +/// +/// Note however that the algorithm uses some heuristics, +/// to avoid the combinatorial explosion of trying every possible bnode-pairing. +/// As a result, it is not 100% accurate (see below). +/// +/// # Accuracy +/// +/// If `g1` and `g2` are isomorphic, the function will always return `true`. +/// +/// If they are not isomorphic, the function will generally return `false`, +/// but a few pathological cases may be falses positives +/// (*i.e.* recognized as isomorphic while they are not). +/// +/// For example, the graph: +/// +/// ```turtle +/// _:a :rel _:b. +/// _:b :rel _:a. +/// _:c :rel _:c. +/// ``` +/// +/// and the graph: +/// +/// ```turtle +/// _:a :rel _:b. +/// _:b :rel _:c. +/// _:c :rel _:a. +/// ``` +/// +/// are considered isomorphic by this algorithm, +/// because they have the same number of blank nodes and arcs, +/// and all of their blank nodes are locally indistinguisable +/// (same number of incoming and outgoinc arcs, +/// linking them to undistinguishable blank nodes). +/// +/// Correctly answering in this kind of pathological case requires a combinatorial exploration +/// of all possible bnode-pairings, which would make the algorithm very slow in the worst case. +/// +/// The choice has been made to accept this flaw, +/// as such undistinguishable blank nodes are very rare in real data, +/// and not particularly useful. pub fn isomorphic_graphs(g1: &G1, g2: &G2) -> StreamResult where G1: Graph, @@ -87,100 +117,46 @@ where // ------------------------------------- // - regardless of blank nodes // - implicitly checks that g1 and g2 have the same length - let g1_in_g2 = check_for_equal_triples_regardless_bns(g1, g2)?; - let g2_in_g1 = check_for_equal_triples_regardless_bns(g2, g1).map_err(StreamError::reverse)?; - - if !(g1_in_g2 && g2_in_g1) { + if !check_for_equal_triples_regardless_bns(g1, g2)? { + return Ok(false); + } + if !check_for_equal_triples_regardless_bns(g2, g1).map_err(StreamError::reverse)? { return Ok(false); } // Create hashes - let bn_hashes1 = match calc_bn_hashes::(g1) { + let bn_hashes1 = match calc_bn_hashes::(g1, bns1) { Ok(map) => map, - Err(SourceError(e)) => return Err(SourceError(e)), - Err(SinkError(_)) => return Ok(false), // Not the best solution + Err(e) => return Err(SourceError(e)), }; - let bn_hashes2 = match calc_bn_hashes::(g2) { + let bn_hashes2 = match calc_bn_hashes::(g2, bns2) { Ok(map) => map, - Err(SourceError(e)) => return Err(SinkError(e)), - Err(SinkError(_)) => return Ok(false), // Not the best solution + Err(e) => return Err(SinkError(e)), }; - // Create mapping - let mut bn_mapping = HashMap::new(); + // Check that, for each hash, there are the same number of bnodes in each graph. for (hash, bns1) in bn_hashes1 { - for bn1 in bns1 { - let bn2 = match bn_hashes2.get(&hash) { - Some(bn) => bn, - None => return Ok(false), // No matching blank node in g2! - }; - bn_mapping.insert(bn1, bn2); - } - } - - // Apply mapping - isomorphic_graphs_with_mapping(g1, g2, bn_mapping) -} - -/// Builds a `TermMatcher` by using the blank node mapping provided. -/// -/// If the given term is a blank node the matcher will match all possible -/// mappings for that blank node, i.e. included redundant blank nodes. If the -/// given term is not a blank node the matcher will only match the given term. -/// -/// This aligns with the description of bijection _M_ described in the -/// [RDF specs](https://www.w3.org/TR/2014/REC-rdf11-concepts-20140225/#graph-isomorphism). -pub(crate) fn bn_mapper<'m, T1, T2>( - mapping: &'m HashMap>, - t: &'m T1, -) -> Vec<&'m dyn TTerm> -where - T1: TTerm + Hash + Eq, - T2: TTerm + Hash + Eq, -{ - if t.kind() == TermKind::BlankNode { - match mapping.get(t) { - None => vec![], - Some(bns) => bns.iter().map(TTerm::as_dyn).collect(), + let bns1_len = bns1.len(); + let bns2_len = bn_hashes2.get(&hash).map(|x| x.len()).unwrap_or(0); + if bns1_len != bns2_len { + return Ok(false); // Not the same number of "equivalent" bnodes } - } else { - vec![t.as_dyn()] } -} - -/// Checks for each triple in `g1` with at least one blank node if it is also -/// contained in `g2` if the blank node `mapping` is applied. -fn isomorphic_graphs_with_mapping( - g1: &G1, - g2: &G2, - mapping: HashMap, &Vec>>, -) -> StreamResult -where - E1: 'static + Error, - E2: 'static + Error, - G1: Graph, - G2: Graph, - GTerm: Clone + Eq + Hash, - GTerm: Clone + Eq + Hash, -{ - for t in g1.triples() { - let t = t.source_err()?; - - if t.s().kind() == TermKind::BlankNode - || t.p().kind() == TermKind::BlankNode - || t.o().kind() == TermKind::BlankNode - { - let ms = bn_mapper(&mapping, t.s()); - let mp = bn_mapper(&mapping, t.p()); - let mo = bn_mapper(&mapping, t.o()); - - if g2.triples_matching(&ms, &mp, &mo).next().is_none() { - return Ok(false); - } - } - } - - Ok(true) + Ok(true) // heuristically + // At this point, we are *almost* certain the graphs are isomorphic + // (see section 'accuracy' in function documentation). + // To be 100% certain, + // we would need to try every possible 1-1 mapping of compatible bnodes + // (i.e. bnodes with the same hash), + // and test every arc against that mapping. + /* + for bn_mapping in make_all_possible_mappings(bns1, bns2) { + if isomorphic_graphs_with_mapping(g1, g2, bn_mapping { + Ok(true) + } + } + Ok(false) + */ } pub(crate) fn match_ignore_bns(t: &T) -> AnyOrExactlyRef<&T> @@ -194,8 +170,8 @@ where } } -/// Checks is each triple in `g1` is also in `g2` regardless of blank node -/// labels. +/// Checks is each triple in `g1` is also in `g2` +/// regardless of blank node labels. /// /// # Example /// @@ -242,164 +218,166 @@ where Ok(true) } +#[allow(dead_code)] +fn dbg_map(map: &HashMap, T)>>) +where + G: Graph, + GTerm: Sized, +{ + for (hash, bns) in map { + print!("=== {:8x} ", hash); + for (bn, _) in bns { + print!("{} ", bn.value()); + } + println!(); + } + println!("=== ---"); +} + /// Calculate a hash for each blank node. /// -/// The hash of a blank node in a graph with `distance == 0` is the hash of all -/// terms in the triples in which the blank node occurs. Should this not be -/// enough to create distinct hashes, one can increase the distance. Increasing -/// distance means that beginning from the blank nodes triples the graph is -/// traversed up and down to add further triples to the hash calculation. -/// -/// Blank nodes are not included in calculating the hashes. -/// -/// The hashes are distinct if every 'bucket', i.e. the `Vec` in the returning -/// `HashMap` has only one element. +/// We first compute a hash based on all adjacent triples, ignoring bnodes. /// -/// An exception are redundant blank nodes. If the algorithm detects such nodes -/// they will share the same hash. +/// If several blank nodes have the same hash, +/// we modify their hash with the hash of their adjacent blank nodes. +/// We repeat this step until either +/// - we reached a point where each blank node has a unique hash, or +/// - the last step didn't change the number of distinct hash. +/// At this point, if several blank nodes share the same hash, +/// they must be absolutely redundant. fn calc_bn_hashes( g: &G, -) -> StreamResult>>, G::Error, AlgorithmFailure> + bnodes: HashSet>, +) -> Result>>, G::Error> where G: Graph, GTerm: Clone + Eq + Hash, H: Hasher + Default, { - let mut res_map = HashMap::new(); - let mut unresolved_map = HashMap::new(); - - for bn in g.bnodes().source_err()?.into_iter() { - let (hash, upstream, downstream) = calc_bns_init_hash::(&bn, g).source_err()?; - unresolved_map - .entry(hash) - .or_insert_with(Vec::new) - .push((bn, upstream, downstream)); + let mut n2h = HashMap::new(); + let mut map = HashMap::new(); + + let n_bnodes = bnodes.len(); + for bn in bnodes { + let (hash, related) = calc_bns_init_hash::(&bn, g)?; + n2h.insert(bn.clone(), hash); + map.entry(hash).or_insert_with(Vec::new).push((bn, related)); } + //dbg_map::(&map); - let mut last_map = unresolved_map; - unresolved_map = HashMap::new(); + let mut len_old_map = 0; - let mut i = 0; + while map.len() < n_bnodes && map.len() != len_old_map { + len_old_map = map.len(); + let last_map = map; + map = HashMap::new(); + let last_n2h = n2h.clone(); - while !last_map.is_empty() && i < MAX_DISTANCE { - for (hash, bns) in last_map.into_iter() { + for (hash, bns) in last_map { if bns.len() == 1 { - // Distinct hash. - let (bn, _, _) = bns.into_iter().next().expect("len == 1"); - res_map.insert(hash, vec![bn]); - } else if bns - .iter() - .all(|(_, upstream, downstream)| upstream.is_empty() && downstream.is_empty()) - { - // Can no longer traverse graph to distinguish nodes, i.e. they must be redundant. - let redundants = bns.into_iter().map(|(bn, _, _)| bn).collect(); - res_map.insert(hash, redundants); + map.insert(hash, bns); } else { - // improve hash by further traversing. - for (bn, upstream, downstream) in bns { - let (better_hash, upstream, downstream) = - improve_hash_by_increasing_distance::( - hash, - &upstream, - &downstream, - g, - ) - .source_err()?; - unresolved_map - .entry(better_hash) + for (bn, related) in bns { + let mut hasher = H::default(); + hash.hash(&mut hasher); + + let mut modifiers = Vec::new(); + for (role, other) in related.iter() { + modifiers.push((role, last_n2h[other])); + } + modifiers.sort_unstable(); // to ensure reproducibility + for (role, hash) in modifiers { + role.hash(&mut hasher); + hash.hash(&mut hasher); + } + let new_hash = hasher.finish(); + *n2h.get_mut(&bn).unwrap() = new_hash; + map.entry(new_hash) .or_insert_with(Vec::new) - .push((bn, upstream, downstream)); + .push((bn, related)); } } } - - last_map = unresolved_map; - unresolved_map = HashMap::new(); - i += 1; + //dbg_map::(&map); } - - if i >= MAX_DISTANCE { - return Err(AlgorithmFailure).sink_err(); + let mut ret = HashMap::with_capacity(map.len()); + for (hash, bns) in map { + let v = bns.into_iter().map(|(bn, _)| bn).collect(); + ret.insert(hash, v); } - - Ok(res_map) + Ok(ret) } /// Calculate the blank node's initial hash in the graph, i.e. for distance 0. /// -/// Returns the initial hash, the upstream nodes and the downstream nodes. +/// Returns the initial hash, and a vec of related blank node +/// (associated with an opaque role identifier ) #[allow(clippy::type_complexity)] -fn calc_bns_init_hash( - bn: >erm, - g: &G, -) -> Result<(u64, Vec>, Vec>), G::Error> +fn calc_bns_init_hash(bn: >erm, g: &G) -> Result<(u64, Vec<(u8, GTerm)>), G::Error> where G: Graph, GTerm: Clone + Eq + Hash, H: Hasher + Default, { - // for same hashing result we need to order the triples' hashes. - let mut triple_hashes = BTreeSet::new(); - - let mut upstream = vec![]; - let mut downstream = vec![]; + let mut triple_hashes = Vec::new(); + let mut related = vec![]; - for tri in g.triples() { + for tri in g.triples_with_s(bn) { let tri = tri?; - if tri.s() == bn || tri.p() == bn || tri.o() == bn { - triple_hashes.insert(hash_triple_without_bn::>(&tri)); - if tri.o() != bn { - upstream.push(tri.o().clone()) - }; - if tri.s() != bn { - downstream.push(tri.s().clone()) - }; + triple_hashes.push(hash_triple_without_bn::>(&tri)); + let p = tri.p(); + if p.kind() == TermKind::BlankNode && p != bn { + related.push((0, p.clone())); + } + let o = tri.o(); + if o.kind() == TermKind::BlankNode && o != bn { + related.push((1, o.clone())); + } + } + for tri in g.triples_with_p(bn) { + let tri = tri?; + triple_hashes.push(hash_triple_without_bn::>(&tri)); + let s = tri.s(); + if s.kind() == TermKind::BlankNode && s != bn { + related.push((2, s.clone())); + } + let o = tri.o(); + if o.kind() == TermKind::BlankNode && o != bn { + related.push((3, o.clone())); + } + } + for tri in g.triples_with_o(bn) { + let tri = tri?; + triple_hashes.push(hash_triple_without_bn::>(&tri)); + let s = tri.s(); + if s.kind() == TermKind::BlankNode && s != bn { + related.push((4, s.clone())); + } + let p = tri.p(); + if p.kind() == TermKind::BlankNode && p != bn { + related.push((5, p.clone())); } } - // hashing - let mut hasher = H::default(); - triple_hashes.into_iter().for_each(|h| h.hash(&mut hasher)); - - Ok((hasher.finish(), upstream, downstream)) -} - -/// Improves an existing hash by further traversing the graph. -#[allow(clippy::type_complexity)] -fn improve_hash_by_increasing_distance( - hash: u64, - upstream: &[GTerm], - downstream: &[GTerm], - g: &G, -) -> Result<(u64, Vec>, Vec>), G::Error> -where - G: Graph, - GTerm: Clone + Eq + Hash, - H: Hasher + Default, -{ - // for same hashing result we need to order the triples' hashes. - let mut triple_hashes = BTreeSet::new(); - - let upstream = traverse_from_s_to_o::(upstream, g, &mut triple_hashes)?; - let downstream = traverse_from_o_to_s::(downstream, g, &mut triple_hashes)?; + triple_hashes.sort_unstable(); // to ensure reproducibility // hashing let mut hasher = H::default(); - // initialize with existing hash. - hash.hash(&mut hasher); triple_hashes.into_iter().for_each(|h| h.hash(&mut hasher)); - Ok((hasher.finish(), upstream, downstream)) + Ok((hasher.finish(), related)) } // utility -pub(crate) fn hash_if_not_bn(t: &T, h: &mut H) +pub(crate) fn hash_if_not_bn(t: &T, role: u8, h: &mut H) where T: TTerm + ?Sized, H: Hasher, { if t.kind() != TermKind::BlankNode { term_hash(t, h) + } else { + role.hash(h) } } @@ -409,262 +387,229 @@ where T: Triple, { let mut h = H::default(); - hash_if_not_bn(t.s(), &mut h); - hash_if_not_bn(t.p(), &mut h); - hash_if_not_bn(t.o(), &mut h); + hash_if_not_bn(t.s(), 0, &mut h); + hash_if_not_bn(t.p(), 1, &mut h); + hash_if_not_bn(t.o(), 2, &mut h); h.finish() } -/// Looks for triples where the given terms are objects. -/// Those triples' hashes are inserted into the list and a list of their -/// subjects is returned. -fn traverse_from_o_to_s( - upstream: &[GTerm], - g: &G, - hashes: &mut BTreeSet, -) -> Result>, G::Error> -where - G: Graph, - GTerm: Clone + Eq + Hash, - H: Hasher + Default, -{ - let mut subjects = vec![]; - for o in upstream { - for tri in g.triples_with_o(o) { - let tri = tri?; - hashes.insert(hash_triple_without_bn::>(&tri)); - subjects.push(tri.s().clone()); - } - } - Ok(subjects) -} - -/// Looks for triples where the given terms are subjects. -/// Those triples' hashes are inserted into the list and a list of their -/// objects is returned. -fn traverse_from_s_to_o( - downstream: &[GTerm], - g: &G, - hashes: &mut BTreeSet, -) -> Result>, G::Error> -where - G: Graph, - GTerm: Clone + Eq + Hash, - H: Hasher + Default, -{ - let mut objects = vec![]; - for s in downstream { - for tri in g.triples_with_s(s) { - let tri = tri?; - hashes.insert(hash_triple_without_bn::>(&tri)); - objects.push(tri.o().clone()); - } - } - Ok(objects) -} - -/* #[cfg(test)] mod test { use super::*; - use crate::graph::inmem::FastGraph; - //use crate::parser::{nt, turtle}; - use crate::triple::stream::TripleSource; + use crate::ns::xsd; + use crate::term::test::TestTerm; + + type StaticTerm = TestTerm<&'static str>; #[test] fn simple() -> Result<(), Box> { - let g1 = r#" - @prefix foaf: . - - _:alice foaf:name "Alice"; - foaf:mbox ; - foaf:knows _:bob . - _:bob foaf:name "Bob". - "#; - let g2 = r#" - @prefix foaf: . - - _:a foaf:name "Alice"; - foaf:mbox ; - foaf:knows _:b . - _:b foaf:name "Bob". - "#; - let g3 = r#" - @prefix foaf: . - - _:a foaf:name "Alice"; - foaf:mbox ; - foaf:knows _:b . - _:c foaf:name "Bob". - "#; - let g1: FastGraph = turtle::parse_str(g1).collect_triples()?; - let g2: FastGraph = turtle::parse_str(g2).collect_triples()?; - let g3: FastGraph = turtle::parse_str(g3).collect_triples()?; - + let foaf = "http://xmlns.com/foaf/0.1/"; + let foaf_knows = StaticTerm::iri2(foaf, "knows"); + let foaf_mbox = StaticTerm::iri2(foaf, "mbox"); + let foaf_name = StaticTerm::iri2(foaf, "name"); + let mbox_alice = StaticTerm::iri("mailto:alice@work.example"); + let lit_alice = StaticTerm::lit_dt("alice", xsd::string); + let lit_bob = StaticTerm::lit_dt("bob", xsd::string); + + let make_graph = |b1: &'static str, b2: &'static str| -> Vec<[StaticTerm; 3]> { + let b1 = StaticTerm::bnode(b1); + let b2 = StaticTerm::bnode(b2); + vec![ + [b1, foaf_name, lit_alice], + [b1, foaf_mbox, mbox_alice], + [b1, foaf_knows, b2], + [b2, foaf_name, lit_bob], + ] + }; + let g1 = make_graph("alice", "bob"); + assert!(isomorphic_graphs(&g1, &g1)?); + + let g2 = make_graph("a", "b"); assert!(isomorphic_graphs(&g1, &g2)?); assert!(isomorphic_graphs(&g2, &g1)?); - assert!(!isomorphic_graphs(&g1, &g3)?); - assert!(!isomorphic_graphs(&g2, &g3)?); + + let g3 = make_graph("b", "a"); + assert!(isomorphic_graphs(&g2, &g3)?); + assert!(isomorphic_graphs(&g1, &g3)?); + + let b1 = StaticTerm::bnode("alice"); + let g4 = vec![ + [b1, foaf_name, lit_alice], + [b1, foaf_mbox, mbox_alice], + [b1, foaf_knows, StaticTerm::bnode("bob")], + [StaticTerm::bnode("bobby"), foaf_name, lit_bob], + ]; + assert!(!isomorphic_graphs(&g1, &g4)?); + assert!(!isomorphic_graphs(&g4, &g1)?); Ok(()) } + fn make_chain(ids: &'static str) -> Vec<[StaticTerm; 3]> { + let rel = StaticTerm::iri("tag:rel"); + let nodes: Vec<_> = (0..ids.len()) + .map(|i| StaticTerm::bnode(&ids[i..i + 1])) + .collect(); + let mut graph = Vec::with_capacity(ids.len() - 1); + for i in 1..nodes.len() { + graph.push([nodes[i - 1], rel, nodes[i]]); + } + graph + } + #[test] - fn different_parsers() -> Result<(), Box> { - let ttl = r#" - @prefix foaf: . - - [] foaf:name "Alice"; - foaf:mbox ; - foaf:knows [foaf:name "Bob"] . - "#; - let nt = r#" - _:alice "Alice". - _:alice . - _:alice _:bob. - _:bob "Bob". - "#; - let ttl: FastGraph = turtle::parse_str(ttl).collect_triples()?; - let nt: FastGraph = nt::parse_str(nt).collect_triples()?; - - assert!(isomorphic_graphs(&nt, &ttl)?); - assert!(isomorphic_graphs(&ttl, &nt)?); + fn chain() -> Result<(), Box> { + let g1 = make_chain("abcdefghij"); + assert!(isomorphic_graphs(&g1, &g1)?); + let g2 = make_chain("jihgfedcba"); + assert!(isomorphic_graphs(&g1, &g2)?); + assert!(isomorphic_graphs(&g2, &g1)?); + let g3 = make_chain("abcdefghijk"); + assert!(!isomorphic_graphs(&g1, &g3)?); Ok(()) } - /// Every subject and object is a blank node with the a different predicate. #[test] - fn heterogeneous_grid() -> Result<(), Box> { - let g1 = r#" - @prefix : . - - _:a :p1 _:b, _:d . - _:c :p2 _:b, _:f . - _:e :p3 _:b, _:d, _:f, _:h . - _:g :p4 _:d, _:h . - _:i :p5 _:f, _:h . - "#; - let g2 = r#" - @prefix : . - - _:a2 :p1 _:b2, _:d2 . - _:c2 :p2 _:b2, _:f2 . - _:e2 :p3 _:b2, _:d2, _:f2, _:h2 . - _:g2 :p4 _:d2, _:h2 . - _:i2 :p5 _:f2, _:h2 . - "#; - let g1: FastGraph = turtle::parse_str(g1).collect_triples()?; - let g2: FastGraph = turtle::parse_str(g2).collect_triples()?; + fn cycle2() -> Result<(), Box> { + let g1 = make_chain("aba"); + assert!(isomorphic_graphs(&g1, &g1)?); + let g2 = make_chain("ABA"); + assert!(isomorphic_graphs(&g1, &g2)?); + assert!(isomorphic_graphs(&g2, &g1)?); + Ok(()) + } + #[test] + fn cycle_long() -> Result<(), Box> { + let g1 = make_chain("abcdefghia"); + assert!(isomorphic_graphs(&g1, &g1)?); + let g2 = make_chain("jihgfedcbj"); assert!(isomorphic_graphs(&g1, &g2)?); assert!(isomorphic_graphs(&g2, &g1)?); + let g3 = make_chain("abcdefghija"); + assert!(!isomorphic_graphs(&g1, &g3)?); + Ok(()) + } + + #[test] + #[ignore] + fn cycle_pathological() -> Result<(), Box> { + // This case is tricky (and does not work with the current implementation). + // Both graphs contain the same number of (blank nodes) and the same number of arcs. + // All blank nodes are locally undistinguishable from each other: + // - they have exactly 1 incoming arc and 1 outgoing arc, + // - both linking them to a blank node that are themselves undistinguisgable. + let mut g1 = make_chain("abca"); + let mut g1b = make_chain("defgd"); + g1.append(&mut g1b); + + let g2 = make_chain("abcdefga"); + assert!(!isomorphic_graphs(&g1, &g2)?); Ok(()) } - /// Every subject and object is a blank node with the same predicate. - /// Source of test: http://aidanhogan.com/docs/rdf-canonicalisation.pdf #[test] - fn homogeneous_grid() -> Result<(), Box> { - let g1 = r#" - @prefix : . - - _:a :p _:b, _:d . - _:c :p _:b, _:f . - _:e :p _:b, _:d, _:f, _:h . - _:g :p _:d, _:h . - _:i :p _:f, _:h . - "#; - let g2 = r#" - @prefix : . - - _:a2 :p _:b2, _:d2 . - _:c2 :p _:b2, _:f2 . - _:e2 :p _:b2, _:d2, _:f2, _:h2 . - _:g2 :p _:d2, _:h2 . - _:i2 :p _:f2, _:h2 . - "#; - let g1: FastGraph = turtle::parse_str(g1).collect_triples()?; - let g2: FastGraph = turtle::parse_str(g2).collect_triples()?; + fn cycle_almost_pathological() -> Result<(), Box> { + // This is uses the same graphs as above (cycle_pathological), + // but *one* of the blank nodes is distinguished by an additional property, + // which breaks symmetry and allow the algorithm to give the correct answer. + // + // This illustrate why the pathological case is not too bad: + // in real data, *most* be nodes will be distinguisgable like that. + let typ = StaticTerm::iri("tag:type"); + let dist = StaticTerm::iri("tag:Distinguished"); + + let mut g1 = make_chain("abca"); + let mut g1b = make_chain("defgd"); + g1.append(&mut g1b); + g1.push([g1[0][0], typ, dist]); + + let mut g2 = make_chain("abcdefga"); + g2.push([g2[0][0], typ, dist]); + assert!(!isomorphic_graphs(&g1, &g2)?); + Ok(()) + } + fn make_clique(ids: &'static str) -> Vec<[StaticTerm; 3]> { + let rel = StaticTerm::iri("tag:rel"); + let nodes: Vec<_> = (0..ids.len()) + .map(|i| StaticTerm::bnode(&ids[i..i + 1])) + .collect(); + let mut graph = Vec::with_capacity(ids.len() * ids.len()); + for n1 in nodes.iter() { + for n2 in nodes.iter() { + graph.push([*n1, rel, *n2]); + } + } + graph + } + + #[test] + fn clique() -> Result<(), Box> { + let g1 = make_clique("abcde"); + assert!(isomorphic_graphs(&g1, &g1)?); + + let g2 = make_clique("ABCDE"); assert!(isomorphic_graphs(&g1, &g2)?); assert!(isomorphic_graphs(&g2, &g1)?); + let g3 = make_clique("abcd"); + assert!(!isomorphic_graphs(&g1, &g3)?); Ok(()) } - /// Like homogeneous grid but with redundant nodes removed in the second graph. + fn make_tree(ids: &'static str) -> Vec<[StaticTerm; 3]> { + let rel = StaticTerm::iri("tag:rel"); + let nodes: Vec<_> = (0..ids.len()) + .map(|i| StaticTerm::bnode(&ids[i..i + 1])) + .collect(); + let mut graph = Vec::with_capacity(ids.len() * ids.len()); + let mut i = 0; + while 2 * i < nodes.len() { + graph.push([nodes[i], rel, nodes[2 * i]]); + if 2 * i + 1 < nodes.len() { + graph.push([nodes[i], rel, nodes[2 * i + 1]]); + } + i += 1; + } + graph + } + #[test] - fn truncated_grid() -> Result<(), Box> { - let g1 = r#" - @prefix : . - - _:a :p _:b, _:d . - _:c :p _:b, _:f . - _:e :p _:b, _:d, _:f, _:h . - _:g :p _:d, _:h . - _:i :p _:f, _:h . - "#; - let g2 = r#" - @prefix : . - - _:a2 :p _:b2 . - "#; - let g1: FastGraph = turtle::parse_str(g1).collect_triples()?; - let g2: FastGraph = turtle::parse_str(g2).collect_triples()?; + fn tree() -> Result<(), Box> { + let g1 = make_tree("abcdefghij"); + assert!(isomorphic_graphs(&g1, &g1)?); - assert!(!isomorphic_graphs(&g1, &g2)?); - assert!(!isomorphic_graphs(&g2, &g1)?); + let g2 = make_tree("ABCDEFGHIJ"); + assert!(isomorphic_graphs(&g1, &g2)?); + assert!(isomorphic_graphs(&g2, &g1)?); + let g3 = make_tree("abcdefghijk"); + assert!(!isomorphic_graphs(&g1, &g3)?); Ok(()) } - /// Source of test: http://aidanhogan.com/docs/rdf-canonicalisation.pdf + #[test] - fn spider_like() -> Result<(), Box> { - let g1 = r#" - @prefix : . - - :Chile :cabinet _:b1, [ - :members 23 - ], [ - :members 23 - ], _:b4 ; - :presidency _:a1, _:a2, _:a3, _:a4 . - - _:a1 :next _:a2 . - _:a2 :next _:a3 ; - :president :MBachelet . - _:a3 :next _:a4. - _:a4 :president :MBachelet . - - :MBachelet :spouse _:c . - "#; - let g2 = r#" - @prefix : . - - :Chile :cabinet _:b12, [ - :members 23 - ], [ - :members 23 - ], _:b42 ; - :presidency _:a12, _:a22, _:a32, _:a42 . - - _:a12 :next _:a22 . - _:a22 :next _:a32 ; - :president :MBachelet . - _:a32 :next _:a42. - _:a42 :president :MBachelet . - - :MBachelet :spouse _:c2 . - "#; - let g1: FastGraph = turtle::parse_str(g1).collect_triples()?; - let g2: FastGraph = turtle::parse_str(g2).collect_triples()?; + fn predicate() -> Result<(), Box> { + let rel = StaticTerm::iri("tag:rel"); + let b1 = StaticTerm::bnode("b1"); + let b2 = StaticTerm::bnode("b2"); + let b3 = StaticTerm::bnode("b3"); + let b4 = StaticTerm::bnode("b4"); + let g1 = vec![[b1, rel, b2], [b2, rel, b3], [rel, b1, b4]]; + assert!(isomorphic_graphs(&g1, &g1)?); + + let g2 = vec![[b2, rel, b3], [b3, rel, b4], [rel, b2, b1]]; assert!(isomorphic_graphs(&g1, &g2)?); assert!(isomorphic_graphs(&g2, &g1)?); + let g3 = vec![[b1, rel, b2], [b2, rel, b3], [rel, b2, b4]]; + assert!(!isomorphic_graphs(&g2, &g3)?); + assert!(!isomorphic_graphs(&g1, &g3)?); + Ok(()) } } -*/ From b1e8e9fe71bf9a0a9148b93ef094a3575283b37d Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Champin Date: Thu, 16 Jul 2020 08:29:38 +0200 Subject: [PATCH 09/11] moved core parts of the 'parser' module to 'sophia_api' --- api/src/lib.rs | 2 +- {sophia => api}/src/parser.rs | 13 ++--------- {sophia => api}/src/parser/_location.rs | 30 ------------------------- sophia/src/lib.rs | 12 +++++++++- sophia/src/parser/gtrig.rs | 10 ++++----- sophia/src/parser/nq.rs | 10 ++++----- sophia/src/parser/nt.rs | 10 ++++----- sophia/src/parser/trig.rs | 10 ++++----- sophia/src/parser/turtle.rs | 16 ++++++++----- sophia/src/parser/xml.rs | 6 ++--- sophia/src/parser/xml/_error.rs | 29 +++++++++++++++++++++++- sophia/src/parser/xml/_handler.rs | 1 - 12 files changed, 71 insertions(+), 78 deletions(-) rename {sophia => api}/src/parser.rs (93%) rename {sophia => api}/src/parser/_location.rs (72%) diff --git a/api/src/lib.rs b/api/src/lib.rs index 34fc650d..4a7c8964 100644 --- a/api/src/lib.rs +++ b/api/src/lib.rs @@ -6,7 +6,7 @@ pub mod dataset; pub mod graph; pub mod ns; -//pub mod parser; +pub mod parser; pub mod quad; //pub mod serializer; pub mod term; diff --git a/sophia/src/parser.rs b/api/src/parser.rs similarity index 93% rename from sophia/src/parser.rs rename to api/src/parser.rs index 07f4e849..f40691b0 100644 --- a/sophia/src/parser.rs +++ b/api/src/parser.rs @@ -1,7 +1,7 @@ //! API for parsing RDF syntaxes. -use sophia_api::quad::stream::QuadSource; -use sophia_api::triple::stream::TripleSource; +use crate::quad::stream::QuadSource; +use crate::triple::stream::TripleSource; mod _location; pub use _location::*; @@ -94,12 +94,3 @@ macro_rules! def_mod_functions_for_bufread_parser { } }; } - -pub mod gtrig; -pub mod nq; -pub mod nt; -pub mod rio_common; -pub mod trig; -pub mod turtle; -#[cfg(feature = "xml")] -pub mod xml; diff --git a/sophia/src/parser/_location.rs b/api/src/parser/_location.rs similarity index 72% rename from sophia/src/parser/_location.rs rename to api/src/parser/_location.rs index 7012af6f..c45a1bad 100644 --- a/sophia/src/parser/_location.rs +++ b/api/src/parser/_location.rs @@ -1,5 +1,4 @@ // this module is transparently re-exported by its parent `parser` -use std::error::Error; use std::fmt; /// A location in a parsed stream, which can be unknown, a specific point, or a span. @@ -68,32 +67,3 @@ impl fmt::Display for Position { pub trait WithLocation { fn location(&self) -> Location; } - -/// An extension for `Result`s embedding a [`LocatableError`](trait.LocatableError.html). -pub trait LocatableResult -where - E: LocatableError, -{ - /// Add location information to the embeded error, if any. - fn locate_err_with(self, ls: L) -> Result; -} - -impl LocatableResult for Result -where - E: LocatableError, -{ - fn locate_err_with(self, ls: L) -> Result { - self.map_err(|e| e.locate_with(ls)) - } -} - -/// An error which can be enriched with location information. -/// -/// See also [`WithLocation`](trait.WithLocation.html) -/// and [`LocatableResult`](./trait.LocatableResult.html). -pub trait LocatableError: Error { - type WithLocation: WithLocation + Error; - - /// Add location information to this error. - fn locate_with(self, ls: L) -> Self::WithLocation; -} diff --git a/sophia/src/lib.rs b/sophia/src/lib.rs index 5422320b..6d806596 100644 --- a/sophia/src/lib.rs +++ b/sophia/src/lib.rs @@ -63,7 +63,6 @@ //! # Ok::<(), Box>(()) //! ``` -pub mod parser; pub mod query; pub mod serializer; @@ -80,6 +79,17 @@ pub mod graph { pub mod ns { pub use sophia_api::ns::*; } +pub mod parser { + pub use sophia_api::parser::*; + pub mod gtrig; + pub mod nq; + pub mod nt; + pub mod rio_common; + pub mod trig; + pub mod turtle; + #[cfg(feature = "xml")] + pub mod xml; +} pub mod quad { pub use sophia_api::quad::*; } diff --git a/sophia/src/parser/gtrig.rs b/sophia/src/parser/gtrig.rs index 4690ee18..f082917f 100644 --- a/sophia/src/parser/gtrig.rs +++ b/sophia/src/parser/gtrig.rs @@ -1,11 +1,9 @@ //! Adapter for the Generalized TriG parser from [RIO](https://github.com/Tpt/rio/blob/master/turtle/src/gtrig.rs) -use std::io::BufRead; - -use rio_turtle::{GTriGParser as RioGTriGParser, TurtleError}; - use crate::parser::rio_common::*; -use crate::parser::QuadParser; +use rio_turtle::{GTriGParser as RioGTriGParser, TurtleError}; +use sophia_api::parser::QuadParser; +use std::io::BufRead; /// TriG parser based on RIO. #[derive(Clone, Debug, Default)] @@ -24,7 +22,7 @@ impl QuadParser for GTriGParser { } } -def_mod_functions_for_bufread_parser!(GTriGParser, QuadParser); +sophia_api::def_mod_functions_for_bufread_parser!(GTriGParser, QuadParser); // --------------------------------------------------------------------------------- // tests diff --git a/sophia/src/parser/nq.rs b/sophia/src/parser/nq.rs index 717f7a58..40df2766 100644 --- a/sophia/src/parser/nq.rs +++ b/sophia/src/parser/nq.rs @@ -2,12 +2,10 @@ //! //! [N-Quads]: https://www.w3.org/TR/n-quads/ -use std::io::BufRead; - -use rio_turtle::{NQuadsParser as RioNQParser, TurtleError}; - use crate::parser::rio_common::*; -use crate::parser::QuadParser; +use rio_turtle::{NQuadsParser as RioNQParser, TurtleError}; +use sophia_api::parser::QuadParser; +use std::io::BufRead; /// N-Quads parser based on RIO. #[derive(Clone, Debug, Default)] @@ -20,7 +18,7 @@ impl QuadParser for NQuadsParser { } } -def_mod_functions_for_bufread_parser!(NQuadsParser, QuadParser); +sophia_api::def_mod_functions_for_bufread_parser!(NQuadsParser, QuadParser); // --------------------------------------------------------------------------------- // tests diff --git a/sophia/src/parser/nt.rs b/sophia/src/parser/nt.rs index f328932d..ffe9a75a 100644 --- a/sophia/src/parser/nt.rs +++ b/sophia/src/parser/nt.rs @@ -2,12 +2,10 @@ //! //! [N-Triples]: https://www.w3.org/TR/n-triples/ -use std::io::BufRead; - -use rio_turtle::{NTriplesParser as RioNTParser, TurtleError}; - use crate::parser::rio_common::*; -use crate::parser::TripleParser; +use rio_turtle::{NTriplesParser as RioNTParser, TurtleError}; +use sophia_api::parser::TripleParser; +use std::io::BufRead; /// N-Triples parser based on RIO. #[derive(Clone, Debug, Default)] @@ -20,7 +18,7 @@ impl TripleParser for NTriplesParser { } } -def_mod_functions_for_bufread_parser!(NTriplesParser, TripleParser); +sophia_api::def_mod_functions_for_bufread_parser!(NTriplesParser, TripleParser); // --------------------------------------------------------------------------------- // tests diff --git a/sophia/src/parser/trig.rs b/sophia/src/parser/trig.rs index ec685d7c..028e31bf 100644 --- a/sophia/src/parser/trig.rs +++ b/sophia/src/parser/trig.rs @@ -1,11 +1,9 @@ //! Adapter for the TriG parser from [RIO](https://github.com/Tpt/rio/blob/master/turtle/src/turtle.rs) -use std::io::BufRead; - -use rio_turtle::{TriGParser as RioTriGParser, TurtleError}; - use crate::parser::rio_common::*; -use crate::parser::QuadParser; +use rio_turtle::{TriGParser as RioTriGParser, TurtleError}; +use sophia_api::parser::QuadParser; +use std::io::BufRead; /// TriG parser based on RIO. #[derive(Clone, Debug, Default)] @@ -24,7 +22,7 @@ impl QuadParser for TriGParser { } } -def_mod_functions_for_bufread_parser!(TriGParser, QuadParser); +sophia_api::def_mod_functions_for_bufread_parser!(TriGParser, QuadParser); // --------------------------------------------------------------------------------- // tests diff --git a/sophia/src/parser/turtle.rs b/sophia/src/parser/turtle.rs index 8524ff3a..8e49c548 100644 --- a/sophia/src/parser/turtle.rs +++ b/sophia/src/parser/turtle.rs @@ -2,11 +2,11 @@ use std::io::BufRead; +use crate::parser::rio_common::*; use rio_api::parser::ParseError; use rio_turtle::{TurtleError, TurtleParser as RioTurtleParser}; - -use crate::parser::rio_common::*; -use crate::parser::{Location, TripleParser, WithLocation}; +use sophia_api::parser::{Location, TripleParser, WithLocation}; +use thiserror::Error; /// Turtle parser based on RIO. #[derive(Clone, Debug, Default)] @@ -25,16 +25,20 @@ impl TripleParser for TurtleParser { } } -impl WithLocation for TurtleError { +#[derive(Debug, Error)] +#[error("{0}")] +pub struct SophiaTurtleError(pub TurtleError); + +impl WithLocation for SophiaTurtleError { fn location(&self) -> Location { - match self.textual_position() { + match self.0.textual_position() { None => Location::Unknown, Some(pos) => Location::from_lico(pos.line_number() + 1, pos.byte_number() + 1), } } } -def_mod_functions_for_bufread_parser!(TurtleParser, TripleParser); +sophia_api::def_mod_functions_for_bufread_parser!(TurtleParser, TripleParser); // --------------------------------------------------------------------------------- // tests diff --git a/sophia/src/parser/xml.rs b/sophia/src/parser/xml.rs index c9f32343..60eed730 100644 --- a/sophia/src/parser/xml.rs +++ b/sophia/src/parser/xml.rs @@ -21,7 +21,7 @@ use url::Url; use crate::ns::rdf; use crate::ns::xsd; use crate::ns::Namespace; -use crate::parser::{LocatableError, TripleParser}; +use sophia_api::parser::TripleParser; use sophia_api::term::matcher::TermMatcher; use sophia_api::term::SimpleIri; use sophia_iri::is_absolute_iri_ref; @@ -117,7 +117,7 @@ impl TripleParser for RdfXmlParser { } } -def_mod_functions_for_bufread_parser!(RdfXmlParser, TripleParser); +sophia_api::def_mod_functions_for_bufread_parser!(RdfXmlParser, TripleParser); // --- @@ -271,9 +271,9 @@ mod test { use crate::graph::inmem::HashGraph; use crate::graph::inmem::TermIndexMapU; use crate::graph::Graph; - use crate::parser::TripleParser; use crate::triple::stream::TripleSource; use crate::triple::Triple; + use sophia_api::parser::TripleParser; use sophia_api::term::{CopyTerm, TTerm}; use sophia_term::factory::RcTermFactory; use sophia_term::{BoxTerm, Term}; diff --git a/sophia/src/parser/xml/_error.rs b/sophia/src/parser/xml/_error.rs index 400c3287..c64f7b45 100644 --- a/sophia/src/parser/xml/_error.rs +++ b/sophia/src/parser/xml/_error.rs @@ -1,5 +1,6 @@ -use crate::parser::{LocatableError, Location, Position, WithLocation}; use quick_xml::Reader; +use sophia_api::parser::{Location, Position, WithLocation}; +use std::error::Error; use std::io::BufRead; pub type Result = std::result::Result; @@ -35,6 +36,14 @@ impl WithLocation for XmlParserError { } } +/// An error which can be enriched with location information. +pub trait LocatableError: Error { + type WithLocation: WithLocation + Error; + + /// Add location information to this error. + fn locate_with(self, ls: L) -> Self::WithLocation; +} + impl<'a, BR> LocatableError<&'a Reader
> for quick_xml::Error where BR: BufRead, @@ -50,6 +59,24 @@ where } } +/// An extension for `Result`s embedding a [`LocatableError`](trait.LocatableError.html). +pub trait LocatableResult +where + E: LocatableError, +{ + /// Add location information to the embeded error, if any. + fn locate_err_with(self, ls: L) -> Result; +} + +impl LocatableResult for Result +where + E: LocatableError, +{ + fn locate_err_with(self, ls: L) -> Result { + self.map_err(|e| e.locate_with(ls)) + } +} + /// Errors that violate the /// [RDF/XML specification](https://www.w3.org/TR/rdf-syntax-grammar/). /// diff --git a/sophia/src/parser/xml/_handler.rs b/sophia/src/parser/xml/_handler.rs index 6359da40..74fdbb59 100644 --- a/sophia/src/parser/xml/_handler.rs +++ b/sophia/src/parser/xml/_handler.rs @@ -1,5 +1,4 @@ use super::*; -use crate::parser::LocatableResult; use sophia_api::term::TTerm; /// The state of the parser. From fb2e01a99038b6ff058fceaf1e5a8f265ee3038a Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Champin Date: Thu, 16 Jul 2020 09:17:49 +0200 Subject: [PATCH 10/11] moved core parts of the 'serializer' module to 'sophia_api' --- api/src/lib.rs | 2 +- {sophia => api}/src/serializer.rs | 5 +- jsonld/src/serializer.rs | 2 +- jsonld/src/test_util.rs | 2 +- sophia/src/lib.rs | 6 +- sophia/src/serializer/common.rs | 123 ------------------------------ sophia/src/serializer/nq.rs | 8 +- sophia/src/serializer/nt.rs | 5 +- 8 files changed, 15 insertions(+), 138 deletions(-) rename {sophia => api}/src/serializer.rs (98%) delete mode 100644 sophia/src/serializer/common.rs diff --git a/api/src/lib.rs b/api/src/lib.rs index 4a7c8964..78216806 100644 --- a/api/src/lib.rs +++ b/api/src/lib.rs @@ -8,6 +8,6 @@ pub mod graph; pub mod ns; pub mod parser; pub mod quad; -//pub mod serializer; +pub mod serializer; pub mod term; pub mod triple; diff --git a/sophia/src/serializer.rs b/api/src/serializer.rs similarity index 98% rename from sophia/src/serializer.rs rename to api/src/serializer.rs index 96afe778..eb28d348 100644 --- a/sophia/src/serializer.rs +++ b/api/src/serializer.rs @@ -14,10 +14,7 @@ use crate::dataset::*; use crate::graph::*; use crate::quad::stream::*; -use crate::triple::{stream::*, *}; - -pub mod nq; -pub mod nt; +use crate::triple::stream::*; /// A triple serializer writes triples according to a given format. pub trait TripleSerializer { diff --git a/jsonld/src/serializer.rs b/jsonld/src/serializer.rs index c7e077c4..8707709b 100644 --- a/jsonld/src/serializer.rs +++ b/jsonld/src/serializer.rs @@ -2,8 +2,8 @@ use crate::config::*; use crate::error::*; use json::JsonValue; use sophia::quad::stream::*; -use sophia::serializer::*; use sophia::triple::stream::{SinkError, StreamResult}; +use sophia_api::serializer::*; mod engine; mod rdf_object; diff --git a/jsonld/src/test_util.rs b/jsonld/src/test_util.rs index 5901ce5a..cbb6c5ac 100644 --- a/jsonld/src/test_util.rs +++ b/jsonld/src/test_util.rs @@ -5,8 +5,8 @@ use crate::error::*; use crate::serializer::Jsonifier; use json::JsonValue; use sophia::dataset::MutableDataset; -use sophia::serializer::QuadSerializer; use sophia::triple::stream::SinkError; +use sophia_api::serializer::QuadSerializer; use sophia_iri::resolve::Resolve; use sophia_term::iri::Iri; use sophia_term::BoxTerm; diff --git a/sophia/src/lib.rs b/sophia/src/lib.rs index 6d806596..b05392a8 100644 --- a/sophia/src/lib.rs +++ b/sophia/src/lib.rs @@ -64,7 +64,6 @@ //! ``` pub mod query; -pub mod serializer; pub mod dataset { pub use sophia_api::dataset::*; @@ -93,6 +92,11 @@ pub mod parser { pub mod quad { pub use sophia_api::quad::*; } +pub mod serializer { + pub use sophia_api::serializer::*; + pub mod nq; + pub mod nt; +} pub mod term { pub use sophia_term::*; } diff --git a/sophia/src/serializer/common.rs b/sophia/src/serializer/common.rs deleted file mode 100644 index c1546830..00000000 --- a/sophia/src/serializer/common.rs +++ /dev/null @@ -1,123 +0,0 @@ -//! Reusable types, functions and macros for implementing serializers. - -/// This macro provides a straightforward implementation of the default functions -/// of a serializer module. -#[macro_export] -macro_rules! def_default_serializer_api { - ($writer: ident, $stringifier: ident) => { - /// Shortcut for `Config::default().writer(write)` - #[inline] - pub fn writer(write: W) -> $writer { - Config::default().writer(write) - } - - /// Shortcut for `Config::default().stringifier()` - #[inline] - pub fn stringifier() -> $stringifier { - Config::default().stringifier() - } - }; - () => { - def_default_serializer_api!(Writer, Stringifier); - }; -} - -/// This macro provides a straightforward implementation of the `Stringifier` type, -/// based on the `Writer` type, for triples. -/// -/// See also -/// [`def_quad_stringifier`](macro.def_quad_stringifier.html) -#[macro_export] -macro_rules! def_triple_stringifier { - ($writer: ident, $stringifier: ident) => { - /// A `TripleSink` returned by `Config::stringifier` - pub struct $stringifier { - writer: $writer>, - } - - impl $crate::serializer::TripleStringifier for $stringifier { - type Config = Config; - - fn new(config: Config) -> $stringifier { - $stringifier { - writer: $writer::new(Vec::new(), config), - } - } - } - - impl $crate::triple::stream::TripleSink for $stringifier { - type Outcome = String; - /// never returns an error - type Error = std::convert::Infallible; - - fn feed(&mut self, t: &T) -> std::result::Result<(), Self::Error> - where - T: $crate::triple::Triple, - { - self.writer.feed(t).map_err(|_| unreachable!()) - } - - fn finish(&mut self) -> std::result::Result { - let mut v = Vec::new(); - swap(&mut self.writer.write, &mut v); - Ok(unsafe { String::from_utf8_unchecked(v) }) - } - } - }; - () => { - def_triple_stringifier!(Writer, Stringifier); - }; -} - -/// This macro provides a straightforward implementation of the `Stringifier` type, -/// based on the `Writer` type, for quads. -/// -/// See also -/// [`def_triple_stringifier`](macro.def_triple_stringifier.html) -#[macro_export] -macro_rules! def_quad_stringifier { - ($writer: ident, $stringifier: ident) => { - /// A `QuadSink` returned by `Config::stringifier` - pub struct $stringifier { - writer: $writer>, - } - - impl $crate::serializer::QuadStringifier for $stringifier { - type Config = Config; - - fn new(config: Config) -> $stringifier { - $stringifier { - writer: $writer::new(Vec::new(), config), - } - } - } - - impl $crate::quad::stream::QuadSink for $stringifier { - type Outcome = String; - /// never returns an error - type Error = std::convert::Infallible; - - fn feed(&mut self, t: &T) -> std::result::Result<(), Self::Error> - where - T: $crate::quad::Quad, - { - self.writer.feed(t).map_err(|_| unreachable!()) - } - - fn finish(&mut self) -> std::result::Result { - let mut v = Vec::new(); - swap(&mut self.writer.write, &mut v); - Ok(unsafe { String::from_utf8_unchecked(v) }) - } - } - }; - () => { - def_quad_stringifier!(Writer, Stringifier); - }; -} - -#[cfg(test)] -mod test { - // The code from this module is tested through its use in other modules - // (especially the ::serializer::nt::test module). -} diff --git a/sophia/src/serializer/nq.rs b/sophia/src/serializer/nq.rs index 9caf5556..cbb83f84 100644 --- a/sophia/src/serializer/nq.rs +++ b/sophia/src/serializer/nq.rs @@ -9,12 +9,10 @@ //! [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html //! [`BufWriter`]: https://doc.rust-lang.org/std/io/struct.BufWriter.html -use std::io; - -use sophia_api::quad::{stream::*, Quad}; - use super::nt::write_term; -use super::*; +use sophia_api::quad::{stream::*, Quad}; +use sophia_api::serializer::*; +use std::io; /// N-Quads serializer configuration. #[derive(Clone, Debug, Default)] diff --git a/sophia/src/serializer/nt.rs b/sophia/src/serializer/nt.rs index e74c7baf..f7043878 100644 --- a/sophia/src/serializer/nt.rs +++ b/sophia/src/serializer/nt.rs @@ -10,11 +10,12 @@ //! [`BufWriter`]: https://doc.rust-lang.org/std/io/struct.BufWriter.html use sophia_api::ns::xsd; +use sophia_api::serializer::*; use sophia_api::term::{TTerm, TermKind}; +use sophia_api::triple::stream::{StreamResult, TripleSource}; +use sophia_api::triple::Triple; use std::io; -use super::*; - /// N-Triples serializer configuration. #[derive(Clone, Debug, Default)] pub struct NtConfig { From ef0da6b2d0827d9e1f284400b6fe39d4ec90aa06 Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Champin Date: Thu, 16 Jul 2020 09:54:20 +0200 Subject: [PATCH 11/11] improve doc of 'sophia_api' --- api/src/dataset/test.rs | 6 +++--- api/src/graph/test.rs | 6 +++--- api/src/lib.rs | 35 +++++++++++++++++++++++++++++--- api/src/ns.rs | 6 ++++-- api/src/quad/streaming_mode.rs | 2 +- api/src/triple/streaming_mode.rs | 2 +- sophia/src/lib.rs | 22 ++++++++++++++++++++ 7 files changed, 66 insertions(+), 13 deletions(-) diff --git a/api/src/dataset/test.rs b/api/src/dataset/test.rs index 41bdc508..3889faa1 100644 --- a/api/src/dataset/test.rs +++ b/api/src/dataset/test.rs @@ -108,7 +108,7 @@ where println!(">>>>"); } -/// Generates a test suite for implementations of +/// Generate a test suite for an implementation of /// [`Dataset`], [`CollectibleDataset`] and [`MutableDataset`]. /// /// If your type only implements [`Dataset`] and [`CollectibleDataset`], @@ -841,8 +841,8 @@ macro_rules! test_dataset_impl { }; } -/// Generates a test suite for implementations of -/// [`Dataset`], [`CollectibleDataset`]. +/// Generate a test suite for an implementation of +/// [`Dataset`] and [`CollectibleDataset`]. /// /// If your type also implements [`MutableDataset`], /// you should use [`test_dataset_impl`] instead. diff --git a/api/src/graph/test.rs b/api/src/graph/test.rs index 35147eee..f8f02307 100644 --- a/api/src/graph/test.rs +++ b/api/src/graph/test.rs @@ -133,7 +133,7 @@ where assert!(collection.into_iter().any(|i| item == i)) } -/// Generates a test suite for implementations of +/// Generate a test suite for an implementation of /// [`Graph`], [`CollectibleGraph`] and [`MutableGraph`]. /// /// If your type only implements [`Graph`] and [`CollectibleGraph`], @@ -591,8 +591,8 @@ macro_rules! test_graph_impl { }; } -/// Generates a test suite for implementations of -/// [`Graph`], [`CollectibleGraph`]. +/// Generate a test suite for an implementation of +/// [`Graph`] and [`CollectibleGraph`]. /// /// If your type also implements [`MutableGraph`], /// you should use [`test_graph_impl`] instead. diff --git a/api/src/lib.rs b/api/src/lib.rs index 78216806..821e5147 100644 --- a/api/src/lib.rs +++ b/api/src/lib.rs @@ -1,7 +1,36 @@ -//! This crate aims to provide a comprehensive toolkit -//! for working with [RDF] and [Linked Data] in Rust. +//! This crate provides a foundation, +//! as a set of traits and core types, +//! for building interoperable implementations of [RDF] and [Linked Data] in Rust. //! -//! See https://docs.rs/sophia/latest/sophia/ +//! For an all-included crate +//! (providing actual implementations of the traits defined here), +//! see [`sophia`](https://docs.rs/sophia/latest/sophia/). +//! +//! # RDF +//! +//! [RDF] is a data model +//! designed to exchange knowledge on the Web +//! in an interoperable way. +//! Each piece of knowledge in RDF (a *statement*) +//! is represented by a [triple], made of three [term]s. +//! A set of [triple]s forms an RDF [graph]. +//! Finally, several [graph]s can be grouped in a collection +//! called a [dataset], where each [graph] is identified by a unique name. +//! +//! [RDF]: https://www.w3.org/TR/rdf-primer/ +//! [Linked Data]: http://linkeddata.org/ +//! [triple]: triple/index.html +//! [term]: term/index.html +//! [graph]: graph/index.html +//! [dataset]: dataset/index.html +//! +//! # Generalized vs. Strict RDF model +//! +//! The data model supported by this crate is in fact +//! a superset of the RDF data model as defined by the W3C. +//! When the distinction matters, +//! they will be called, respectively, +//! the *generalized* RDF model, and the *strict* RDF model. pub mod dataset; pub mod graph; diff --git a/api/src/ns.rs b/api/src/ns.rs index ee90da56..2a109dd3 100644 --- a/api/src/ns.rs +++ b/api/src/ns.rs @@ -103,11 +103,13 @@ impl> std::ops::Deref for Namespace { } } -/// Helper for creating a "namespace module" +/// Create a "namespace module" /// defining a set of terms within a given IRI space. /// /// # Tests /// This macro also create a test module to check that all created IRIs are valid. +/// +/// This allows to skip those checks at runtime, keeping the initialization of the namespace fast. #[macro_export] macro_rules! namespace { ($iri_prefix:expr, $($suffix:ident),*; $($r_id:ident, $r_sf:expr),*) => { @@ -145,7 +147,7 @@ macro_rules! namespace { }; } -/// Helper for creating a term in a "namespace module". +/// Create a term in a "namespace module". /// In general, you should use the [`namespace!`](macro.namespace.html) macro instead. /// /// # Safety diff --git a/api/src/quad/streaming_mode.rs b/api/src/quad/streaming_mode.rs index ecfd7af9..82f226e6 100644 --- a/api/src/quad/streaming_mode.rs +++ b/api/src/quad/streaming_mode.rs @@ -120,7 +120,7 @@ where } } -/// A macro for creating a [streaming mode] for lifetime-parameterized Quad types. +/// Create a [streaming mode] for lifetime-parameterized Quad types. /// /// This macro expects two identifiers: /// * the first one (`$mode`) will be the identifier of the streaming mode; diff --git a/api/src/triple/streaming_mode.rs b/api/src/triple/streaming_mode.rs index d617770a..38f90f38 100644 --- a/api/src/triple/streaming_mode.rs +++ b/api/src/triple/streaming_mode.rs @@ -180,7 +180,7 @@ where } } -/// A macro for creating a [streaming mode] for lifetime-parameterized Triple types. +/// Create a [streaming mode] for lifetime-parameterized Triple types. /// /// This macro expects two identifiers: /// * the first one (`$mode`) will be the identifier of the streaming mode; diff --git a/sophia/src/lib.rs b/sophia/src/lib.rs index b05392a8..fc9cd61e 100644 --- a/sophia/src/lib.rs +++ b/sophia/src/lib.rs @@ -65,19 +65,30 @@ pub mod query; +/// This module re-exports symbols from +/// [`sophia_api::dataset`](https://docs.rs/sophia_api/latest/sophia_api/dataset/), +/// and also provides some implementations of its traits. pub mod dataset { pub use sophia_api::dataset::*; pub mod indexed; pub mod inmem; } +/// This module re-exports symbols from +/// [`sophia_api::graph`](https://docs.rs/sophia_api/latest/sophia_api/graph/), +/// and also provides some implementations of its traits. pub mod graph { pub use sophia_api::graph::*; pub mod indexed; pub mod inmem; } +/// This module re-exports symbols from +/// [`sophia_api::ns`](https://docs.rs/sophia_api/latest/sophia_api/ns/). pub mod ns { pub use sophia_api::ns::*; } +/// This module re-exports symbols from +/// [`sophia_api::parser`](https://docs.rs/sophia_api/latest/sophia_api/parser/), +/// and also provides some implementations of its traits. pub mod parser { pub use sophia_api::parser::*; pub mod gtrig; @@ -89,17 +100,28 @@ pub mod parser { #[cfg(feature = "xml")] pub mod xml; } +/// This module re-exports symbols from +/// [`sophia_api::quad`](https://docs.rs/sophia_api/latest/sophia_api/quad/). pub mod quad { pub use sophia_api::quad::*; } +/// This module re-exports symbols from +/// [`sophia_api::serializer`](https://docs.rs/sophia_api/latest/sophia_api/serializer/), +/// and also provides some implementations of its traits. pub mod serializer { pub use sophia_api::serializer::*; pub mod nq; pub mod nt; } +/// This module re-exports symbols from +/// [`sophia_api::term`](https://docs.rs/sophia_api/latest/sophia_api/term/) +/// and +/// [`sophia_term`](https://docs.rs/sophia_term/latest/sophia_term/). pub mod term { pub use sophia_term::*; } +/// This module re-exports symbols from +/// [`sophia_api::triple`](https://docs.rs/sophia_api/latest/sophia_api/triple/). pub mod triple { pub use sophia_api::triple::*; }