From e27746438ff79e686f6aed45d6e08d6411efab6c Mon Sep 17 00:00:00 2001 From: Rick Moynihan Date: Thu, 17 Feb 2022 17:18:40 +0000 Subject: [PATCH 1/3] Add API support for java.nio.Path types This means you can via the clojure API resolve CSVW files inside zips without having to extract them. --- src/csv2rdf/source.clj | 19 ++++++++++++++++--- test/csv2rdf/csvw_test.clj | 15 +++++++++++++++ 2 files changed, 31 insertions(+), 3 deletions(-) create mode 100644 test/csv2rdf/csvw_test.clj diff --git a/src/csv2rdf/source.clj b/src/csv2rdf/source.clj index 3b48792..f559930 100644 --- a/src/csv2rdf/source.clj +++ b/src/csv2rdf/source.clj @@ -7,7 +7,8 @@ [clojure.data.json :as json] [clojure.string :as string]) (:import [java.net URI] - [java.io File InputStream ByteArrayInputStream])) + [java.io File InputStream ByteArrayInputStream] + [java.nio.file Files Path OpenOption StandardOpenOption])) (defprotocol URIable "Represents an object with an associated URI." @@ -18,7 +19,11 @@ (->uri [file] (.toURI file)) URI - (->uri [uri] uri)) + (->uri [uri] uri) + + Path + (->uri [p] + (.toUri p))) (defprotocol JSONSource "Protocol for loading a JSON map from a given source" @@ -58,7 +63,11 @@ (get-json [f] (read-json f)) String - (get-json [s] (json/read-str s))) + (get-json [s] (json/read-str s)) + + Path + (get-json [p] + (read-json (Files/newInputStream p (into-array OpenOption [StandardOpenOption/READ]))))) (defrecord MapMetadataSource [uri json] URIable @@ -111,6 +120,10 @@ (defmethod request-uri-input-stream :file [uri] {:headers {} :stream (io/input-stream uri)}) +;; support reading files in a zip (via API) via the jar protocol +(defmethod request-uri-input-stream :jar [uri] + {:headers {} :stream (io/input-stream uri)}) + (extend-protocol InputStreamRequestable File (request-input-stream [f] {:headers {} diff --git a/test/csv2rdf/csvw_test.clj b/test/csv2rdf/csvw_test.clj new file mode 100644 index 0000000..5aaf5cb --- /dev/null +++ b/test/csv2rdf/csvw_test.clj @@ -0,0 +1,15 @@ +(ns csv2rdf.csvw-test + (:require [clojure.test :refer [deftest is]] + [clojure.java.io :as io] + [csv2rdf.csvw :as csvw]) + (:import [java.net URI] + [java.nio.file FileSystems] + [grafter_2.rdf.protocols Quad])) + +(defn- make-path [& path] + (.getPath (FileSystems/getDefault) "w3c-csvw" (into-array String path))) + +(deftest csv->rdf-with-paths + (is (instance? Quad + (first (csvw/csv->rdf (make-path "tests" "test011" "tree-ops.csv") + (make-path "tests" "test011" "tree-ops.csv-metadata.json")))))) From cec0eaab8fc9152cc8e8e8e294e39d487cd22bf3 Mon Sep 17 00:00:00 2001 From: Rick Moynihan Date: Thu, 17 Feb 2022 17:19:31 +0000 Subject: [PATCH 2/3] Improve docstring for csv->rdf function --- src/csv2rdf/csvw.clj | 41 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/src/csv2rdf/csvw.clj b/src/csv2rdf/csvw.clj index 500cb5c..dd81de1 100644 --- a/src/csv2rdf/csvw.clj +++ b/src/csv2rdf/csvw.clj @@ -19,11 +19,42 @@ (table-statements context table annotated-rows))) (defn csv->rdf - "Runs the CSVW process for the given tabular or metadata data sources and options. If metadata-source - is non-nil then processing will start from the asscociated metadata document, otherwise it will start - from tabular-source. Returns a lazy sequence of statements containing the CSVW output for the specified - CSVW mode. Mode can be specified by the :mode key of the options map if provided, otherwise standard mode - will be used." + "Runs the CSVW process for the given tabular or metadata data sources + and options. + + `tabular-source` and `metadata-source` can be any of the following + types: + + - java.io.File + - java.lang.String + - java.net.URI + - java.nio.file.Path (including nio Paths that are inside zip filesystems) + + If metadata-source is non-nil then processing will start from the + asscociated metadata document, otherwise it will start from + tabular-source. Returns a lazy sequence of statements containing the + CSVW output for the specified CSVW mode. + + The processing mode can be specified by the :mode key of the options + map if provided, otherwise `:standard` mode will be used. Valid + `:mode` options are: + + - `:standard` this mode corresponds to the standard mode specified + in the \"Generating RDF from Tabular Data on the Web\" specification. + + It outputs triples for all information gleaned from the cells of the + tabular data with details of the rows, tables, and table groups. + + This mode yields the most data. + + - `:minimal` this mode corresponds to the minimal mode specified in + the \"Generating RDF from Tabular Data on the Web\" specification. + + It essentially yields the salient RDF; but omits the tabular structure. + + - `:annotated` a custom mode, not part of the standard, which is + like `:minimal`, but it also includes RDF data from the CSVW metadata + json file." ([tabular-source metadata-source] (csv->rdf tabular-source metadata-source {})) ([tabular-source metadata-source {:keys [mode] :as options}] (let [mode (or mode :standard) From 25929b2c4750162cff1b1f9d60090df20a5f3875 Mon Sep 17 00:00:00 2001 From: Rick Moynihan Date: Mon, 21 Feb 2022 11:09:13 +0000 Subject: [PATCH 3/3] Add tests for all supported types to csvw/csv->rdf --- test/csv2rdf/csvw_test.clj | 41 ++++++++++++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 6 deletions(-) diff --git a/test/csv2rdf/csvw_test.clj b/test/csv2rdf/csvw_test.clj index 5aaf5cb..52f9201 100644 --- a/test/csv2rdf/csvw_test.clj +++ b/test/csv2rdf/csvw_test.clj @@ -1,15 +1,44 @@ (ns csv2rdf.csvw-test - (:require [clojure.test :refer [deftest is]] + (:require [clojure.test :refer [deftest is testing]] [clojure.java.io :as io] [csv2rdf.csvw :as csvw]) (:import [java.net URI] [java.nio.file FileSystems] [grafter_2.rdf.protocols Quad])) +(def w3c-dir "w3c-csvw") + (defn- make-path [& path] - (.getPath (FileSystems/getDefault) "w3c-csvw" (into-array String path))) + (.getPath (FileSystems/getDefault) w3c-dir (into-array String path))) + +(defn make-file [& path] + (apply io/file w3c-dir path)) + +(defn csv->rdf? [csv metadata] + (instance? Quad + (first (csvw/csv->rdf csv metadata)))) + +(deftest csv->rdf-supported-types-test + (testing "with java.io.File" + (let [csv (make-file "./tests" "test011" "tree-ops.csv") + metadata (make-file "./tests" "test011" "tree-ops.csv-metadata.json")] + + (is (csv->rdf? csv metadata)) + (is (csv->rdf? nil metadata) + "Resolves csv"))) + + (testing "with URI" + (let [csv (.toURI (make-file "./tests" "test011" "tree-ops.csv")) + metadata (.toURI (make-file "./tests" "test011" "tree-ops.csv-metadata.json"))] + + (is (csv->rdf? csv metadata)) + (is (csv->rdf? nil metadata) + "Resolves csv"))) + + (testing "with java.nio.Path" + (let [csv (make-path "tests" "test011" "tree-ops.csv") + metadata (make-path "tests" "test011" "tree-ops.csv-metadata.json")] -(deftest csv->rdf-with-paths - (is (instance? Quad - (first (csvw/csv->rdf (make-path "tests" "test011" "tree-ops.csv") - (make-path "tests" "test011" "tree-ops.csv-metadata.json")))))) + (is (csv->rdf? csv metadata)) + (is (csv->rdf? nil metadata) + "Resolves csv"))))