From 1502862aaad172805b848df291649e8e2f34fc20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Szczepanik?= Date: Wed, 19 Jul 2023 12:41:22 +0200 Subject: [PATCH 1/7] KBI0028: Start writing down notes on Sciebo/Nextcloud share URLs --- kbi/0028/index.rst | 94 ++++++++++++++++++++++++++++++++++++++++++ kbi/0028/list_files.py | 37 +++++++++++++++++ 2 files changed, 131 insertions(+) create mode 100644 kbi/0028/index.rst create mode 100644 kbi/0028/list_files.py diff --git a/kbi/0028/index.rst b/kbi/0028/index.rst new file mode 100644 index 0000000..a7df4af --- /dev/null +++ b/kbi/0028/index.rst @@ -0,0 +1,94 @@ +.. index:: + single: ; + +KBI0028: Create a DataLad dataset from Nextcloud (Sciebo) public share links +============================================================================ + +:authors: Name +:discussion: +:keywords: comma-separated list, aids, discoverability +:software-versions: _, ... (datalad or other version(s) used when crafting the KBI) + +A DataLad dataset can be created directly from an existing collection +of files in a cloud storage, using share URLs to provide file +access. Nextcloud, and Nextcloud-based regional university service +Sciebo, are examples of cloud storage which allows generation of +folder share URLs with optional password protection. These can be use +to share data with managed permissions (password or named user), where +DataLad access is optional. + +This document extends KBI0007 in two areas: it introduces the uncurl +special remote for URL rewriting and credentials access, and focuses +on Nextcloud-specific URL patterns. + +This document deals specifically with files that were deposited in +Nextcloud without using DataLad. For publishing DataLad datasets to +Nextcloud, see documentation of the create sibbling webdav command. + +Nextcloud URL patterns +---------------------- + +There are three primary ways in which a Nextcloud folder can be shared. These will determine the URL patterns which can be used. + +Public share link, no password +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If the sharing link is password protected, the URL above would not work, as it would redirect requests to a login page. In this case, as well as for sharing with named users only, WebDAV access can be used instead. + +Named user share +^^^^^^^^^^^^^^^^ + +If a folder is shared with a named user, they will see it in their own account like any other folder, so in principle their access and owner access would be the same, and use an URL starting with: + +.. code-block:: none + + https://example.com/nextcloud/remote.php/dav/files/USERNAME/ + +However, with Nextcloud (Sciebo) being a federated service, each user +may have a different instance URL to access their data. Additionally, +the URL includes the username, and each user may place the shared +directory in a different place within their home directory. + +Public share, password protected +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +For a folder shared with a password-protected link, the access URLs would start with: + +.. code-block:: none + + https://example.com/nextcloud/public.php/webdav + +The share token needs to be provided as username, and the (optional) share password as password -- as request credentials, not part of the URL. + +URL pattern - summary +^^^^^^^^^^^^^^^^^^^^^ + +For the WebDAV url, it is useful to represent the share URL as the list of the following components: + +.. code-block:: none + + https:///// + +where: + +* ```` is either ``remote.php/dav/files/USERNAME/`` or + ``public.php/webdav`` +* ```` is the path to the shared folder in a given user's + home directory (none for public shares) +* ```` is the path to a particular file relative to the + shared folder (````) + +Listing files +------------- + +For generating the dataset, a list of file names (relative paths) and +their respective URLs is needed. These can be generated automatically, e.g. with the webdav4 and fsspec Python libraries. + +An example script is given below, using inline comments for explanations. + +The example assumes that user's webdav credentials are already known to DataLad under the name ``sciebo`` (if not, these can be added with ``datalad credentials add``, or provided to the script in a different way). + +.. literalinclude:: list_files.py + :language: python + diff --git a/kbi/0028/list_files.py b/kbi/0028/list_files.py new file mode 100644 index 0000000..fbd68f7 --- /dev/null +++ b/kbi/0028/list_files.py @@ -0,0 +1,37 @@ +import csv +from pathlib import PurePosixPath + +from webdav4.fsspec import WebdavFileSystem +from datalad.api import credentials + +cred = credentials( + "get", + name="webdav-mycred", + return_type="item-or-list", +) +fs = WebdavFileSystem( + "https://example.com/nextcloud/remote.php/dav/files/USERNAME/", + auth=(cred["cred_user"], cred["cred_secret"]), +) + +with open("listing.txt", "wt") as urlfile: + writer = csv.writer(urlfile, delimiter="\t") + writer.writerow(["name", "href"]) + + for dirpath, dirinfo, fileinfo in fs.walk("test-sharing/example2", detail=True): + # fileinfo is a dict, with file names as keys, + # and dicts with actual file info as values; + # we need path ({"name": "..."}) + # and URL compnent ({"href": "remote.php/dav/..."}) + for f in fileinfo.values(): + name = f["name"] + href = f["href"] + + # reported path is relative to root of fs object, + # what we need is relative to the directory that we walk + relpath = PurePosixPath(name).relative_to("test-sharing/example2") + + writer.writerow((relpath, href)) + +# uncurl pattern - may need to adjust slashes a little +# "(?Phttps://[^/]+)/(?Premote\.php/dav/files/[^/]+|public\.php/webdav)/(?Ptest-sharing/example2/)(?P.*)"gm From 5338cfb957f9656e1ab69a3d670a2fe84b0185a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Szczepanik?= Date: Wed, 19 Jul 2023 15:39:56 +0200 Subject: [PATCH 2/7] Cross-link kbi 0007 --- kbi/0007/index.rst | 2 ++ kbi/0028/index.rst | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/kbi/0007/index.rst b/kbi/0007/index.rst index 91d1db7..b3dccc6 100644 --- a/kbi/0007/index.rst +++ b/kbi/0007/index.rst @@ -1,6 +1,8 @@ .. index:: single: datalad; addurls +.. _kbi0007: + KBI0007: Create a DataLad dataset from a published collection of files ====================================================================== diff --git a/kbi/0028/index.rst b/kbi/0028/index.rst index a7df4af..9a144e7 100644 --- a/kbi/0028/index.rst +++ b/kbi/0028/index.rst @@ -17,9 +17,9 @@ folder share URLs with optional password protection. These can be use to share data with managed permissions (password or named user), where DataLad access is optional. -This document extends KBI0007 in two areas: it introduces the uncurl -special remote for URL rewriting and credentials access, and focuses -on Nextcloud-specific URL patterns. +This document extends :ref:`KBI0007` in two areas: it introduces the +uncurl special remote for URL rewriting and credentials access, and +focuses on Nextcloud-specific URL patterns. This document deals specifically with files that were deposited in Nextcloud without using DataLad. For publishing DataLad datasets to From 41f4141da93f86e6715b5209f6f269e90d91690e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Szczepanik?= Date: Wed, 19 Jul 2023 17:13:11 +0200 Subject: [PATCH 3/7] 0028: Finish write-up on Nextcloud (sciebo) share urls --- kbi/0028/index.rst | 173 ++++++++++++++++++++++++++++++++++------- kbi/0028/list_files.py | 22 +++--- 2 files changed, 157 insertions(+), 38 deletions(-) diff --git a/kbi/0028/index.rst b/kbi/0028/index.rst index 9a144e7..4867fb1 100644 --- a/kbi/0028/index.rst +++ b/kbi/0028/index.rst @@ -1,44 +1,69 @@ .. index:: - single: ; + single: datalad; addurls + single: special remote; uncurl KBI0028: Create a DataLad dataset from Nextcloud (Sciebo) public share links ============================================================================ -:authors: Name -:discussion: -:keywords: comma-separated list, aids, discoverability -:software-versions: _, ... (datalad or other version(s) used when crafting the KBI) +:authors: MichaƂ Szczepanik +:discussion: https://github.com/psychoinformatics-de/knowledge-base/pull/104 +:keywords: nextcloud, sciebo, webdav, sharing, addurls +:software-versions: datalad_0.19.2, datalad-next_1.0.0b3, webdav4_0.9.8, fsspec_2023.6.0, sciebo_10.12.2 A DataLad dataset can be created directly from an existing collection of files in a cloud storage, using share URLs to provide file -access. Nextcloud, and Nextcloud-based regional university service -Sciebo, are examples of cloud storage which allows generation of -folder share URLs with optional password protection. These can be use -to share data with managed permissions (password or named user), where -DataLad access is optional. - -This document extends :ref:`KBI0007` in two areas: it introduces the -uncurl special remote for URL rewriting and credentials access, and -focuses on Nextcloud-specific URL patterns. +access. `Nextcloud`_ storage platform (and, by extension, `Sciebo`_, a +Nextcloud-based regional university service) allows generation of +folder share URLs with optional password protection and expiration +time. Creating such share links, as well as granting access to +specific Nextcloud users, is an option for sharing data with managed +permissions. In such use case, DataLad is an optional method of +accessing and indexing data. This document deals specifically with files that were deposited in Nextcloud without using DataLad. For publishing DataLad datasets to -Nextcloud, see documentation of the create sibbling webdav command. +Nextcloud, see the documentation of DataLad-next's +`create-sibling-webdav`_ command instead. + +This document extends :ref:`KBI0007` in two areas: it introduces the +`uncurl`_ special remote for transforming URLs and using credentials, +and focuses on Nextcloud-specific URL patterns. + +.. _nextcloud: https://nextcloud.com/ +.. _sciebo: https://hochschulcloud.nrw/ +.. _create-sibling-webdav: https://docs.datalad.org/projects/next/en/latest/generated/man/datalad-create-sibling-webdav.html +.. _uncurl: https://docs.datalad.org/projects/next/en/latest/generated/datalad_next.annexremotes.uncurl.html#module-datalad_next.annexremotes.uncurl Nextcloud URL patterns ---------------------- -There are three primary ways in which a Nextcloud folder can be shared. These will determine the URL patterns which can be used. +There are three primary ways in which a Nextcloud folder can be +shared. These will determine the URL patterns which can be used. Public share link, no password ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -If the sharing link is password protected, the URL above would not work, as it would redirect requests to a login page. In this case, as well as for sharing with named users only, WebDAV access can be used instead. +In a special (and simplest) case, if the sharing link for a folder is +created without password protection, links to individual files can be +created by appending ``/download?path=&files=`` (where +``path`` is a relative path to a directory, and ``name`` is the file +name). However, if the sharing link is password protected, such URL +would not work, as it would redirect to a login page (html document) +and not to file content. + +In a general case (share links with or without password, as well as +sharing with named users), `Nextcloud's webdav access`_ can be +used. The remainder of the document only covers WebDAV URLs. + +.. _nextcloud's webdav access: https://docs.nextcloud.com/server/20/user_manual/en/files/access_webdav.html Named user share ^^^^^^^^^^^^^^^^ -If a folder is shared with a named user, they will see it in their own account like any other folder, so in principle their access and owner access would be the same, and use an URL starting with: +If a folder is shared with a named user, they will see it in their own +account like any other folder. In principle, access for a share +recipient would be analogous to that of an owner, and use an URL +starting with: .. code-block:: none @@ -52,43 +77,133 @@ directory in a different place within their home directory. Public share, password protected ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -For a folder shared with a password-protected link, the access URLs would start with: +For a folder shared with a password-protected link, the access URLs +would start with: .. code-block:: none https://example.com/nextcloud/public.php/webdav -The share token needs to be provided as username, and the (optional) share password as password -- as request credentials, not part of the URL. +The share token (part of the share link) needs to be provided as +username, and the (optional) share password as password. Note that +these are sent as request credentials, and are not included in the +URL. URL pattern - summary ^^^^^^^^^^^^^^^^^^^^^ -For the WebDAV url, it is useful to represent the share URL as the list of the following components: +In summary, it is useful to represent the WebDAV URL as a combination +of the following components: .. code-block:: none - https:///// + /// where: -* ```` is the instance URL + (``https://example.com/nextcloud/`` in given examples) * ```` is either ``remote.php/dav/files/USERNAME/`` or ``public.php/webdav`` -* ```` is the path to the shared folder in a given user's - home directory (none for public shares) -* ```` is the path to a particular file relative to the - shared folder (````) +* ```` is the path to the shared folder in user's home + directory (none for public shares) +* ```` is the path to a particular file relative to the + shared folder (````) Listing files ------------- For generating the dataset, a list of file names (relative paths) and -their respective URLs is needed. These can be generated automatically, e.g. with the webdav4 and fsspec Python libraries. +their respective URLs is needed. These can be generated automatically, +e.g. with the `webdav4`_ and `fsspec`_ Python libraries. An example script is given below, using inline comments for explanations. -The example assumes that user's webdav credentials are already known to DataLad under the name ``sciebo`` (if not, these can be added with ``datalad credentials add``, or provided to the script in a different way). +The example assumes that user's webdav credentials are already known +to DataLad under the name ``webdav-mycred`` (if not, these can be +added with ``datalad credentials add``, or provided to the script in a +different way). + +.. _webdav4: https://pypi.org/project/webdav4/ +.. _fsspec: https://pypi.org/project/fsspec/ .. literalinclude:: list_files.py :language: python +This would produce the following csv file: + +.. code-block:: none + + name,href + file1.dat,/remote.php/dav/files/USERNAME/sharing/example/file1.dat + foo/file2.dat,/remote.php/dav/files/USERNAME/sharing/example/foo/file2.dat + ... + +Uncurl remote +------------- + +Download URLs are handled by special remotes. The uncurl remote, +available in DataLad-next extension, provides both the ability to +reconfigure URLs and access to DataLad-next's credential workflow. It +can be initialized as follows (optionally with ``autoenable=true``): + +.. code-block:: none + + git annex initremote uncurl type=external externaltype=uncurl encryption=none + +With a known URL pattern (see above), a match expression can be +defined upfront. The regular expression below is relatively generic, +with only the dirpath being specific to the given example. Websites +like `regex101`_ can be helpful in building and understanding the +expression: + +.. code-block:: none + + git annex enableremote uncurl match="(?Phttps://[^/]+)/(?Premote\.php/dav/files/[^/]+|public\.php/webdav)/(?Psharing/example)/(?P.*)" + +The dataset is created based on the previously generated tabular file +with ``datalad addurls``: + +.. code-block:: none + + datalad addurls listing.txt https://example.com/nextcloud{href} {name} + +.. _regex101: https://regex101.com + +Transforming URLs +----------------- + +Assuming the same user moves the folder in their Nextcloud account to +``some/other/place/``, the URL configuration can use all the defined +parts with only ``dirpath`` being different: + +.. code-block:: none + + git annex enableremote uncurl url='{instance}/{accesspath}/some/other/place/{filepath} + +A different user with whom the dataset is shared would have to +additionally replace ``accesspath``, and (possibly) ``instance``. + +A user with whom the access was shared via a link would need to change +``accesspath``, and would not be using ``dirpath``: + +.. code-block:: none + + git annex enableremote uncurl url='{instance}/public.php/webdav/{filepath} + +Credential caveats +------------------ + +Regardless of whether the files are accessed via the +``remote.php/dav/files/USERNAME/`` or ``public.php/webdav`` path, the +authentication realm for the given nextcloud instance is the +same. This means users who already have DataLad credentials saved for +the given realm would be see their requests for password-protected +links refused. As long as ``get`` does not support explicit +credentials, this can be worked around by unsetting the credential +realm. + +If a share link is not password protected, the webdav access via +``public.php/webdav`` can still be used. However, this requires +creating a DataLad credential with the token as username, and a +nonempty password (e.g. a single space or ``xyz``) that would not be used. diff --git a/kbi/0028/list_files.py b/kbi/0028/list_files.py index fbd68f7..5beb36c 100644 --- a/kbi/0028/list_files.py +++ b/kbi/0028/list_files.py @@ -1,24 +1,31 @@ import csv from pathlib import PurePosixPath -from webdav4.fsspec import WebdavFileSystem from datalad.api import credentials +from webdav4.fsspec import WebdavFileSystem +# Retrieve Nextcloud credentials from DataLad cred = credentials( "get", name="webdav-mycred", return_type="item-or-list", ) + +# Create a fsspec filesystem object, with user's Nextcloud home as root fs = WebdavFileSystem( "https://example.com/nextcloud/remote.php/dav/files/USERNAME/", auth=(cred["cred_user"], cred["cred_secret"]), ) -with open("listing.txt", "wt") as urlfile: - writer = csv.writer(urlfile, delimiter="\t") +# Shared directory, contents of which should be listed +DIRNAME = "sharing/example" + +# List files in the shared directory, writing outputs to a csv file for addurls +with open("listing.csv", "wt") as urlfile: + writer = csv.writer(urlfile, delimiter=",") writer.writerow(["name", "href"]) - for dirpath, dirinfo, fileinfo in fs.walk("test-sharing/example2", detail=True): + for dirpath, dirinfo, fileinfo in fs.walk(DIRNAME, detail=True): # fileinfo is a dict, with file names as keys, # and dicts with actual file info as values; # we need path ({"name": "..."}) @@ -29,9 +36,6 @@ # reported path is relative to root of fs object, # what we need is relative to the directory that we walk - relpath = PurePosixPath(name).relative_to("test-sharing/example2") - - writer.writerow((relpath, href)) + relpath = PurePosixPath(name).relative_to(DIRNAME) -# uncurl pattern - may need to adjust slashes a little -# "(?Phttps://[^/]+)/(?Premote\.php/dav/files/[^/]+|public\.php/webdav)/(?Ptest-sharing/example2/)(?P.*)"gm + writer.writerow([relpath, href]) From 335b00e1807088fae635a29b8d763cdce6395707 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Szczepanik?= Date: Thu, 20 Jul 2023 17:10:32 +0200 Subject: [PATCH 4/7] Apply suggestions from code review Co-authored-by: Stephan Heunis --- kbi/0028/index.rst | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/kbi/0028/index.rst b/kbi/0028/index.rst index 4867fb1..7fc8644 100644 --- a/kbi/0028/index.rst +++ b/kbi/0028/index.rst @@ -86,7 +86,7 @@ would start with: The share token (part of the share link) needs to be provided as username, and the (optional) share password as password. Note that -these are sent as request credentials, and are not included in the +these are sent as credentials in the http(s) request header, and are not included in the URL. URL pattern - summary @@ -113,7 +113,7 @@ where: Listing files ------------- -For generating the dataset, a list of file names (relative paths) and +For generating the dataset using the ``addurls`` command, a list of file names (relative paths) and their respective URLs is needed. These can be generated automatically, e.g. with the `webdav4`_ and `fsspec`_ Python libraries. @@ -122,7 +122,7 @@ An example script is given below, using inline comments for explanations. The example assumes that user's webdav credentials are already known to DataLad under the name ``webdav-mycred`` (if not, these can be added with ``datalad credentials add``, or provided to the script in a -different way). +different way, e.g. as environment variables). .. _webdav4: https://pypi.org/project/webdav4/ .. _fsspec: https://pypi.org/project/fsspec/ @@ -139,21 +139,25 @@ This would produce the following csv file: foo/file2.dat,/remote.php/dav/files/USERNAME/sharing/example/foo/file2.dat ... -Uncurl remote +Creating the dataset ------------- -Download URLs are handled by special remotes. The uncurl remote, -available in DataLad-next extension, provides both the ability to +In a DataLad dataset, the process of accessing files that were added via download URLs is handled by a `git-annex special remote`_. The uncurl remote, +available in the `DataLad-next`_ extension, provides both the ability to reconfigure URLs and access to DataLad-next's credential workflow. It -can be initialized as follows (optionally with ``autoenable=true``): +can be initialized as follows (optionally with ``autoenable=true``) inside a newly created and empty DataLad dataset: + +.. _git-annex special remote: https://git-annex.branchable.com/special_remotes/ +.. _DataLad-next: https://github.com/datalad/datalad-next .. code-block:: none git annex initremote uncurl type=external externaltype=uncurl encryption=none -With a known URL pattern (see above), a match expression can be -defined upfront. The regular expression below is relatively generic, -with only the dirpath being specific to the given example. Websites +With a known URL pattern (see above), a match expression for the uncurl special remote can be defined upfront. Defining a match expression allows us to isolate identifiers (such as ``dirpath``, ``filepath``, etc) in the URL pattern, which becomes particularly useful when URLs need to be transformed in future. + +The regular expression below is relatively generic, +with only the ``dirpath`` being specific to the given example. Websites like `regex101`_ can be helpful in building and understanding the expression: @@ -161,8 +165,7 @@ expression: git annex enableremote uncurl match="(?Phttps://[^/]+)/(?Premote\.php/dav/files/[^/]+|public\.php/webdav)/(?Psharing/example)/(?P.*)" -The dataset is created based on the previously generated tabular file -with ``datalad addurls``: +Finally, files are added to the dataset with ``datalad addurls`` using the previously generated csv file: .. code-block:: none @@ -174,8 +177,7 @@ Transforming URLs ----------------- Assuming the same user moves the folder in their Nextcloud account to -``some/other/place/``, the URL configuration can use all the defined -parts with only ``dirpath`` being different: +``some/other/place/``, access to the files in the same DataLad dataset can be retained by setting the URL template of the uncurl remote. The URL template has access to the same identifiers isolated previously with the match expression, and in the case of of this example can use these defined parts with only ``dirpath`` having to change: .. code-block:: none @@ -198,9 +200,9 @@ Regardless of whether the files are accessed via the ``remote.php/dav/files/USERNAME/`` or ``public.php/webdav`` path, the authentication realm for the given nextcloud instance is the same. This means users who already have DataLad credentials saved for -the given realm would be see their requests for password-protected +the given realm would see their requests for password-protected links refused. As long as ``get`` does not support explicit -credentials, this can be worked around by unsetting the credential +credentials, this can be circumvented by unsetting the credential realm. If a share link is not password protected, the webdav access via From 93f5c04a17c5d7574c53765196d1a767a3d2d8e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Szczepanik?= Date: Thu, 20 Jul 2023 17:30:58 +0200 Subject: [PATCH 5/7] Wording and formatting changes --- kbi/0028/index.rst | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/kbi/0028/index.rst b/kbi/0028/index.rst index 7fc8644..b731a51 100644 --- a/kbi/0028/index.rst +++ b/kbi/0028/index.rst @@ -25,9 +25,10 @@ Nextcloud without using DataLad. For publishing DataLad datasets to Nextcloud, see the documentation of DataLad-next's `create-sibling-webdav`_ command instead. -This document extends :ref:`KBI0007` in two areas: it introduces the -`uncurl`_ special remote for transforming URLs and using credentials, -and focuses on Nextcloud-specific URL patterns. +This document extends the ``addurls``-based approach described in +:ref:`KBI0007` in two areas: it introduces the `uncurl`_ special +remote for transforming URLs and using credentials, and focuses on +Nextcloud-specific URL patterns. .. _nextcloud: https://nextcloud.com/ .. _sciebo: https://hochschulcloud.nrw/ @@ -49,7 +50,7 @@ created by appending ``/download?path=&files=`` (where ``path`` is a relative path to a directory, and ``name`` is the file name). However, if the sharing link is password protected, such URL would not work, as it would redirect to a login page (html document) -and not to file content. +and not to the file content. In a general case (share links with or without password, as well as sharing with named users), `Nextcloud's webdav access`_ can be @@ -140,12 +141,14 @@ This would produce the following csv file: ... Creating the dataset -------------- +-------------------- -In a DataLad dataset, the process of accessing files that were added via download URLs is handled by a `git-annex special remote`_. The uncurl remote, -available in the `DataLad-next`_ extension, provides both the ability to -reconfigure URLs and access to DataLad-next's credential workflow. It -can be initialized as follows (optionally with ``autoenable=true``) inside a newly created and empty DataLad dataset: +In a DataLad dataset, the process of accessing files that were added +via download URLs is handled by a `git-annex special remote`_. The +uncurl remote, available in the `DataLad-next`_ extension, provides +both the ability to reconfigure URLs and the access to DataLad-next's +credential workflow. It can be initialized as follows (optionally with +``autoenable=true``) inside a DataLad dataset that has been created: .. _git-annex special remote: https://git-annex.branchable.com/special_remotes/ .. _DataLad-next: https://github.com/datalad/datalad-next @@ -177,7 +180,11 @@ Transforming URLs ----------------- Assuming the same user moves the folder in their Nextcloud account to -``some/other/place/``, access to the files in the same DataLad dataset can be retained by setting the URL template of the uncurl remote. The URL template has access to the same identifiers isolated previously with the match expression, and in the case of of this example can use these defined parts with only ``dirpath`` having to change: +``some/other/place/``, access to the files in the same DataLad dataset +can be retained by setting the URL template of the uncurl remote. The +URL template has access to the same identifiers isolated previously +with the match expression, and in the case of this example can use +these defined parts with only ``dirpath`` having to change: .. code-block:: none @@ -198,7 +205,7 @@ Credential caveats Regardless of whether the files are accessed via the ``remote.php/dav/files/USERNAME/`` or ``public.php/webdav`` path, the -authentication realm for the given nextcloud instance is the +authentication realm for the given Nextcloud instance is the same. This means users who already have DataLad credentials saved for the given realm would see their requests for password-protected links refused. As long as ``get`` does not support explicit From ebbf5d6e940c64cab0fceea5f70389133c508648 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Szczepanik?= Date: Fri, 21 Jul 2023 17:50:10 +0200 Subject: [PATCH 6/7] Fix code block to be consistent with previous ones --- kbi/0028/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kbi/0028/index.rst b/kbi/0028/index.rst index b731a51..4e7b8bc 100644 --- a/kbi/0028/index.rst +++ b/kbi/0028/index.rst @@ -172,7 +172,7 @@ Finally, files are added to the dataset with ``datalad addurls`` using the previ .. code-block:: none - datalad addurls listing.txt https://example.com/nextcloud{href} {name} + datalad addurls listing.csv https://example.com/nextcloud{href} {name} .. _regex101: https://regex101.com From 50c4286cd0422225f2170e69e43adf55aee999d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Szczepanik?= Date: Fri, 21 Jul 2023 17:55:05 +0200 Subject: [PATCH 7/7] Add a comment about spaces in uncurl match expression Discovered that by accident today - registered URL would be urlquoted anyway, plus uncurl tries to split the match pattern into match patterns. --- kbi/0028/index.rst | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/kbi/0028/index.rst b/kbi/0028/index.rst index 4e7b8bc..b2a0cf4 100644 --- a/kbi/0028/index.rst +++ b/kbi/0028/index.rst @@ -159,10 +159,12 @@ credential workflow. It can be initialized as follows (optionally with With a known URL pattern (see above), a match expression for the uncurl special remote can be defined upfront. Defining a match expression allows us to isolate identifiers (such as ``dirpath``, ``filepath``, etc) in the URL pattern, which becomes particularly useful when URLs need to be transformed in future. -The regular expression below is relatively generic, -with only the ``dirpath`` being specific to the given example. Websites -like `regex101`_ can be helpful in building and understanding the -expression: +The regular expression below is relatively generic, with only the +``dirpath`` being given explicitly, and specific to the given +example. Note that if ``dirpath`` included spaces, they would have to +be `url-encoded`_; otherwise, the uncurl remote would split the +expression into two. Websites like `regex101`_ can be helpful in +building and understanding the expression: .. code-block:: none @@ -175,6 +177,7 @@ Finally, files are added to the dataset with ``datalad addurls`` using the previ datalad addurls listing.csv https://example.com/nextcloud{href} {name} .. _regex101: https://regex101.com +.. _url-encoded: https://www.w3schools.com/tags/ref_urlencode.asp Transforming URLs -----------------