Merge branch 'master' into GH-2235-release-notes-3.3.0

Signed-off-by: Håvard Ottestad <hmottestad@gmail.com>
eclipse-rdf4j · Jul 24, 2020 · 595393f · 595393f
2 parents 01280bd + a1903dc
commit 595393f
Show file tree

Hide file tree

Showing 14,766 changed files with 3,197,059 additions and 23,632 deletions.
diff --git a/site/config.toml b/site/config.toml
@@ -1,6 +1,6 @@
-baseurl = "https://rdf4j.eclipse.org/" 
+baseurl = "https://rdf4j.org/" 
 DefaultContentLanguage = "en"
-title = "Eclipse rdf4j"
+title = "Eclipse RDF4J"
 theme = "hugo-solstice-theme"
 metaDataFormat = "yaml"
 googleAnalytics = ""
@@ -10,7 +10,7 @@ pygmentsStyle = "github"
 
 [Params]
   google_tag_manager = ""
-  description = "Eclipse rdf4j: Scalable RDF for Java developers."
+  description = "Eclipse RDF4J: Scalable RDF for Java developers."
   subtitle = "Eclipse Foundation"
   seo_title_suffix = " | The Eclipse Foundation"
   keywords = ["hugo", "solstice", "eclipse foundatiou"]
@@ -26,22 +26,25 @@ pygmentsStyle = "github"
 #  call_for_action_icon = "fa-download"
 
 [Author]
-  name = "Christopher Guindon"
-  website = "https://www.eclipse.org"
-  email = "webdev@eclipse-foundation.org"
-  facebook = "eclipse.org"
-  twitter = "EclipseFdn"
-  youtube = "EclipseFdn"
-  googleplus = "+Eclipse"
-  linkedin = "company/eclipse-foundation/"
+  name = "Eclipse RDF4J developers"
+  website = "https://rdf4j.org/"
+
+[taxonomies]
+  category = "categories"
+  author = "authors"
 
 [permalinks]
   news = "/:sections/:year/:month/:day/:slug/"
 
+[[menu.main]]
+    name = "News"
+    url = "/news/"
+    weight = 1
+
 [[menu.main]]
     name = "About RDF4J"
     url = "/about/"
-    weight = 1
+    weight = 2
 
 [[menu.main]]
     name = "Documentation"

diff --git a/site/content/_index.md b/site/content/_index.md
@@ -6,4 +6,5 @@ subtitle: ""
 description: "a Java RDF/SPARQL framework"
 tagline: "Scalable RDF for Java"
 date: 2018-04-05T15:50:25-04:00
+hide_page_title: true
 ---
diff --git a/site/content/about.md b/site/content/about.md
@@ -33,55 +33,66 @@ though for most usecases the NativeStore will be considerably faster.
 
 On top of these core databases, RDF4J offers a number of functional extensions. These extensions add functionality such as improved full-text search, RDFS inferencing, rule-based reasoning and validation using SHACL/SPIN, and geospatial querying support. For more information see the [RDF4J documentation](/documentation).
 
-The core databases are mainly intended for small to medium-sized datasets. However, RDF4J-compatible databases are developed by several third parties, both open-source/free and commercial, and they often offer better scalability or other extended features. Because these triplestores are compatible with the RDF4J APIs, you will be able to switch your project to a different database with a minimal amount of code changes.
+## Third party database solutions
 
-## Ontotext GraphDB
+The core RDF4J databases are mainly intended for small to medium-sized datasets. However, RDF4J-compatible databases are developed by several third parties, both open-source/free and commercial, and they often offer better scalability or other extended features. Because these triplestores are compatible with the RDF4J APIs, you will be able to switch your project to a different database with a minimal amount of code changes. Here, we list a few options, in no particular order of preference.
 
-<a href="http://www.ontotext.com/"><img src="/images/logos/ontotext.svg" alt="Ontotext" class="logo-vendor"></a>
-[Ontotext GraphDB](http://www.ontotext.com/products/ontotext-graphdb/) is a leading RDF triplestore built on OWL (Ontology Web Language) standards.  GraphDB handles massive loads, queries and OWL inferencing in real time. Ontotext offers GraphDB in several editions, including  GraphDB™ Free, GraphDB™ Standard and GraphDB™ Enterprise. Since release 8, GraphDB is fully compatible with the RDF4J framework.
+### Ontotext GraphDB
 
-## Halyard
+[Ontotext GraphDB](http://www.ontotext.com/products/ontotext-graphdb/) is a leading RDF triplestore built on OWL (Ontology Web Language) standards.  GraphDB handles massive loads, queries and OWL inferencing in real time. Ontotext offers GraphDB in several editions, including  GraphDB™ Free, GraphDB™ Standard and GraphDB™ Enterprise. 
+
+Ontotext are a long-term contributor to the RDF4J project.
+
+### Halyard
 
 [Halyard](https://merck.github.io/Halyard/) is an RDF4J-based horizontally scalable triplestore with full support for named graphs and SPARQL, implemented on top of Apache HBase.
 
-## Stardog
+### Stardog
 
-<a href="http://www.stardog.com/"><img src="/images/logos/stardog.svg" alt="Stardog" class="logo-vendor"></a>
 [Stardog](http://www.stardog.com/) is a fast, lightweight, pure Java RDF store for mission-critical apps. It supports highly scalable storage and retrieval as well as OWL reasoning.
 
-## Amazon Neptune
+### Amazon Neptune
 
-[Amazone Neptune](https://aws.amazon.com/neptune/) is Amazon Neptune is a fast, reliable, fully managed graph database service that makes it easy to build and run applications that work with highly connected datasets. 
+[Amazone Neptune](https://aws.amazon.com/neptune/) is a fast, reliable, fully managed graph database service on Amazon Web Services (AWS) that makes it easy to build and run applications that work with highly connected datasets. 
 
-## Systap Blazegraph™
+### Systap Blazegraph™
 
-<a href="http://www.blazegraph.com/"><img src="/images/logos/blazegraph.png" alt="Blazegraph" height="100" class="logo-vendor"></a>
 [Blazegraph](http://www.blazegraph.com/) (formerly known as Bigdata) is an enterprise graph database by Systap, LLC that provides a horizontally scaling storage and retrieval solution for very large volumes of RDF.
 
-## MarkLogic RDF4J API
+### MarkLogic RDF4J API
 
 The [MarkLogic RDF4J API](https://github.com/marklogic/marklogic-rdf4j) is a full-featured, easy-to-use interface, that provides access to the MarkLogic triplestore via the RDF4J APIs. It offers several additional features such as permissions, and combination queries. More details can be found in the [MarkLogic Developer documentation](https://docs.marklogic.com/guide/semantics/clientAPIs#id_23335).
 
-## Strabon
+### Strabon
 
 [Strabon](http://www.strabon.di.uoa.gr/) is a spatiotemporal RDF store based on RDF4J. You can use it to store linked geospatial data that changes over time and pose queries using two popular extensions of SPARQL. Strabon supports spatial datatypes enabling the serialization of geometric objects in OGC standards WKT and GML. It also offers spatial and temporal selections, spatial and temporal joins, a rich set of spatial functions similar to those offered by geospatial relational database systems and support for multiple Coordinate Reference Systems. Strabon can be used to model temporal domains and concepts such as events, facts that change over time etc. through its support for valid time of triples, and a rich set of temporal functions.
 
-## Openlink Virtuoso RDF4J Provider
+### Openlink Virtuoso RDF4J Provider
 
 The [Openlink Virtuoso RDF4J Provider](http://vos.openlinksw.com/owiki/wiki/VOS/VirtSesame2Provider) is a fully operational Native Graph Model Storage Provider for the Eclipse RDF4J Framework, allowing users of Virtuoso to leverage the Eclipse RDF4J framework to modify, query, and reason with the Virtuoso quad store using the Java language.
 
 # Related projects
 
-Several projects extend or make use of RDF4J in some way, and provide additional functionality on top of the core RDF4J framework.
+Several projects extend or make use of RDF4J in some way, and provide additional functionality on top of the core RDF4J framework. Here, we offer a non-exhaustive list of such projects, both commercial and free/open-source.
+
+## metaphactory
+
+[metaphactory](https://www.metaphacts.com/product) supports knowledge graph management, rapid application development, and end-user oriented interaction. metaphactory runs on top of your on-premise, cloud, or managed graph database and offers capabilities and features to support the entire lifecycle of dealing with knowledge graphs. It is a commercial platform with RDF4J at its core. 
+
+The metaphactory platform is developed by [metaphacts GmbH](https://www.metaphacts.com/), who are a significant contributor to the RDF4J project.
+
+## Neosemantics
+
+[Neosemantics](https://neo4j.com/labs/neosemantics-rdf/) is a plugin that enables the use of RDF in Neo4j. You can use it to import existing RDF datasets, build integrations with RDF generating endpoints or easily construct RDF endpoints on Neo4j, and more.
+
+## Other
 
 - [Apache Marmotta](http://marmotta.apache.org/)<br>
   a Linked Data publication platform.
 - [Carml](https://github.com/carml/carml)<br>
   a library that transforms structured sources to RDF based and declared in an RML mapping.
 - [KOMMA](http://komma.enilink.net/)<br>
   a framework for the management and editing of RDF, RDFS and OWL. It provides Object-Triple-Mapping (comparable to JPA), an Editing framework, Eclipse RCP and RAP integration, on top of Eclipse RDF4J.
-- [Neosemantics](https://github.com/jbarrasa/neosemantics)<br>
-  Neo4J extension to importing RDF into Neo4j and exposing graphs as RDF.
 - [RDF4J Schema Generator](https://github.com/ansell/rdf4j-schema-generator)<br>
   a command line tool and maven plugin to generate vocabulary java classes from RDFS or OWL.
 - [RML-Mapper](https://github.com/RMLio/RML-Mapper)<br>

diff --git a/site/content/authors/havard/_index.md b/site/content/authors/havard/_index.md
@@ -0,0 +1,3 @@
+---
+name: "Håvard M. Ottestad"
+---
diff --git a/site/content/authors/jeen/_index.md b/site/content/authors/jeen/_index.md
@@ -0,0 +1,5 @@
+---
+name: "Jeen Broekstra"
+twitter: "@ABrokenJester"
+---
+
diff --git a/site/content/documentation/developer/squashing.md b/site/content/documentation/developer/squashing.md
@@ -2,18 +2,19 @@
 title: "Squashing Commits"
 ---
 
-When submitting a pull request to RDF4J, we sometimes ask that you squash your commits before we merge. Here we explain a simple way to do that.
+When submitting a pull request to RDF4J, we sometimes ask that you squash your commits, either so you can clean up the commit history a bit, or for example when some of your commits weren't correctly signed off. Here we explain a simple way to do that.
 <!--more-->
 
 On the command line, the process is as follows:
 
 1. Make sure your local _master_ and _develop_ branches are up to date with the upstream.
 2. Check out your pull request branch. 
-3. Run `git rebase -i master` (or `git rebase -i develop` if your PR is against the _develop_ branch).  
+3. Run `git rebase -i master --signoff` (or `git rebase -i develop --signoff` if your branch started from the _develop_ branch).  
+   The `--signoff` flag here makes sure that the new commit produced by the squash operation is correctly signed off.
    You should see a list of commits, each commit starting with the word `pick`.
    Make sure the first commit says "pick" and change the rest from "pick" to "squash". 
 4. Save and close the editor.
-   It will give you the opportunity to change the commit message.
+   It will give you the opportunity to change the commit message. 
 5. Save and close the editor again.
    Then you have to force push the final, squashed commit: `git push --force-with-lease origin`.
 
diff --git a/site/content/documentation/programming/rio.md b/site/content/documentation/programming/rio.md
@@ -1,6 +1,7 @@
 ---
 title: "Parsing and Writing RDF with Rio"
 weight: 4
+toc: true
 ---
 The RDF4J framework includes a set of parsers and writers for RDF called Rio. Rio (“RDF I/O”) is a toolkit that can be used independently from the rest of RDF4J. 
 <!--more-->

diff --git a/site/content/documentation/programming/shacl.md b/site/content/documentation/programming/shacl.md
@@ -92,6 +92,7 @@ As of writing this documentation the following features are supported.
 - `sh:targetSubjectsOf`
 - `sh:targetObjectsOf`
 - `sh:path`
+- `sh:inversePath`
 - `sh:property`
 - `sh:or`
 - `sh:and`
@@ -113,9 +114,12 @@ As of writing this documentation the following features are supported.
 - `sh:in`
 - `sh:deactivated`
 
-Implicit `sh:targetClass` is supported for nodes that are `rdfs:Class` and either of `sh:PropertyShape` or `sh:NodeShape`. Validation for all nodes, equivalent to `owl:Thing` or `rdfs:Resource` in an environment with a reasoner, can be enabled by setting `setUndefinedTargetValidatesAllSubjects(true)`.
+Implicit `sh:targetClass` is supported for nodes that are `rdfs:Class` and either of `sh:PropertyShape` or `sh:NodeShape`. Validation for all nodes, 
+equivalent to `owl:Thing` or `rdfs:Resource` in an environment with a reasoner, can be enabled by setting `setUndefinedTargetValidatesAllSubjects(true)`.
 
-`sh:path` is limited to single predicate paths, eg. `ex:age`. Sequence paths, alternative paths, inverse paths and the like are not supported.
+`sh:path` is limited to single predicate paths, eg. `ex:age` or a single inverse path. Sequence paths, alternative paths and the like are not supported.
+
+Nested `sh:property` is not supported. 
 
 # Validation results
 
@@ -143,8 +147,10 @@ The `validationReportModel` follows the report format specified by the W3C SHACL
 []
     a sh:ValidationReport ;
     sh:conforms false ;
+    rdf4j:truncated false;
     sh:result [
         a sh:ValidationResult ;
+        sh:value "eighteen";
         sh:focusNode <http://example.com/ns#pete> ;
         sh:resultPath <http://example.com/ns#age> ;
         sh:sourceConstraintComponent sh:DatatypeConstraintComponent ;
@@ -156,6 +162,22 @@ The `ValidationReport` class provides the same information as the validationRepo
 
 There is no support for `sh:severity`, all violations will trigger an exception.
 
+## Limiting the validation report
+
+Large validation reports take time to generate and can use large amounts of memory. 
+Limiting the size of the report can be useful to speed up validation and to reduce the number of similar violations. 
+
+Limitations can either be configured directly in the ShaclSail or through the configuration files.
+
+ - `setValidationResultsLimitTotal(1000)` limits the total number of validation results per report to 1000.
+     - `<http://rdf4j.org/config/sail/shacl#validationResultsLimitTotal>`
+ - `setValidationResultsLimitPerConstraint(10)` limits the number of validation results per constraint component to 10
+     - `<http://rdf4j.org/config/sail/shacl#validationResultsLimitPerConstraint>`
+
+ Use -1 to remove a limit and 0 to validate but return an empty validation report. -1 is the default.
+
+ A truncated validation report will have `isTruncated()` return true and the model will have `rdf4j:truncated true`.
+
 ## Retrieving violated shapes
 
 Since all shapes are stored in the SHACL shapes graph, the actual shape that was violated can be retrieved from the
@@ -235,50 +257,45 @@ Parallel validation further increases performance. This can be disabled with `se
 The initial commit to an empty ShaclSail is further optimized if the underlying sail is a MemoryStore.
 
 Some workloads will not fit in memory and need to be validated while stored on disk. This can be achieved by using a 
-NativeStore and temporarily disabling the SHACL validation while loading data. After loading data there is a special
-method to trigger a full validation against your shapes. The process is illustrated in the following example:
+NativeStore and using the new transaction settings introduced in 3.3.0. 
+
+ - `ShaclSail.TransactionSettings.ValidationApproach.Auto`: Let the ShaclSail choose the best approach.
+ - `ShaclSail.TransactionSettings.ValidationApproach.Bulk`: Optimized for large transactions, disables caching and parallel validation and runs a full validation step at the end of the transaction.
+ - `ShaclSail.TransactionSettings.ValidationApproach.Disabled`: Disable validation.
+
+Disabling validation for a transaction may leave your data in an invalid state. Running a transaction with bulk validation will force a full validation. 
+This is a useful approach if you need to use multiple transactions to bulk load your data.  
 
 {{< highlight java >}}
 ShaclSail shaclSail = new ShaclSail(new NativeStore(new File(...), "spoc,ospc,psoc"));
-
-// significantly reduce required memory
-shaclSail.setCacheSelectNodes(false);
-
-// further reduce required memory by not running validation in parallel
-shaclSail.setParallelValidation(false);
-
 SailRepository sailRepository = new SailRepository(shaclSail);
 
-shaclSail.disableValidation();
-
 try (SailRepositoryConnection connection = sailRepository.getConnection()) {
-    // load shapes
-    connection.begin(IsolationLevels.NONE);
-    try (InputStream inputStream = new FileInputStream("shacl.ttl")) {
-        connection.add(inputStream, "", RDFFormat.TURTLE, RDF4J.SHACL_SHAPE_GRAPH);
-    }
-    connection.commit();
-
-    // load data
-    connection.begin(IsolationLevels.NONE);
-    try (InputStream inputStream = new BufferedInputStream(new FileInputStream("data.ttl"))) {
-        connection.add(inputStream, "", RDFFormat.TURTLE);
-    }
-    connection.commit();
-}
-shaclSail.enableValidation();
-
-try (SailRepositoryConnection connection = sailRepository.getConnection()) {
-    connection.begin(IsolationLevels.NONE);
-    ValidationReport revalidate = ((ShaclSailConnection) connection.getSailConnection()).revalidate();
-    connection.commit();
-
-    if (!revalidate.conforms()) {
-        Rio.write(revalidate.asModel(), System.out, RDFFormat.TURTLE);
-    }
-
+
+	connection.begin(IsolationLevels.NONE, ShaclSail.TransactionSettings.ValidationApproach.Bulk);
+
+	// load shapes
+	try (InputStream inputStream = new FileInputStream("shacl.ttl")) {
+		connection.add(inputStream, "", RDFFormat.TURTLE, RDF4J.SHACL_SHAPE_GRAPH);
+	}
+
+	// load data
+	try (InputStream inputStream = new BufferedInputStream(new FileInputStream("data.ttl"))) {
+		connection.add(inputStream, "", RDFFormat.TURTLE);
+	}
+
+	// commit transaction and catch any exception
+	try {
+		connection.commit();
+	} catch (RepositoryException e){
+		if(e.getCause() instanceof ValidationException){
+			Model model = ((ValidationException) e.getCause()).validationReportAsModel();
+			Rio.write(model, System.out, RDFFormat.TURTLE);
+		}
+	}
+
 }
-
+		
 sailRepository.shutDown();
 {{< / highlight >}}
 
@@ -455,4 +472,3 @@ Here are some useful links to learn more about SHACL:
 - [W3C SHACL specification](http://www.w3.org/TR/shacl/)
 - [Validating RDF Data](http://book.validatingrdf.com/) (various authors)
 
-
diff --git a/site/content/documentation/tools/server-workbench.md b/site/content/documentation/tools/server-workbench.md
@@ -380,7 +380,7 @@ Shapes need to be loaded into the following context:
 
     <http://rdf4j.org/schema/rdf4j#SHACLShapeGraph>
 
-<img src="images/loadShapes.png" alt="Loading shapes" class="img-responsive"/>
+<img src="../images/loadShapes.png" alt="Loading shapes" class="img-responsive"/>
 
 This context is a hidden context that is only available through the following commands:
 
@@ -404,7 +404,7 @@ want to use is `1`, you can use the following URL to download your shapes as RDF
 All transactions are validated before being committed. A validation error when uploading data in
 the workbench looks like this:
 
-<img src="images/shaclValidationError.png" alt="Validation error" class="img-responsive"/>
+<img src="../images/shaclValidationError.png" alt="Validation error" class="img-responsive"/>
 
 Your data will only be committed if it passes validation.