Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix some problems with Earthdata authorization. #2709

Merged
merged 8 commits into from
Jun 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions RELEASE_NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ This file contains a high-level description of this package's evolution. Release

## 4.9.3 - TBD

* Fix some problems with earthdata authorization and data access. See [Github #2709](https://github.com/Unidata/netcdf-c/pull/2709).
* Fix a race condition in some ncdump tests. See [Github #2682](https://github.com/Unidata/netcdf-c/pull/2682).
* Fix a minor bug in reporting the use of szip. See [Github #2679](https://github.com/Unidata/netcdf-c/pull/2679).
* Simplify the handling of XGetopt. See [Github #2678](https://github.com/Unidata/netcdf-c/pull/2678).
Expand Down
2 changes: 2 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -2140,6 +2140,8 @@ AC_SUBST([ISCMAKE], [])

# Provide conditional to temporarily suppress tests and such
AM_CONDITIONAL([AX_IGNORE], [test xno = xyes])
# Provide conditional to identify tests that must be run manually
AM_CONDITIONAL([AX_MANUAL], [test xno = xyes])

AC_MSG_NOTICE([generating header files and makefiles])
AC_CONFIG_FILES(test_common.sh:test_common.in)
Expand Down
13 changes: 12 additions & 1 deletion dap4_test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,19 @@ ENDIF()
IF(ENABLE_DAP_REMOTE_TESTS)
add_sh_test(dap4_test test_remote)
add_sh_test(dap4_test test_hyrax)
# add_sh_test(dap4_test test_thredds)
add_sh_test(dap4_test test_dap4url)
IF(RUN_MANUAL_TESTS)
# The following test can only be run by hand.
# It tests earthdata authorization.
# Before running it, one needs to do the following:
# export USERPWD="<username>:<password>"
# where the username and password are registered
# with urs.earthdata.nasa.gov.
add_sh_test(dap4_test test_earthdata)
ENDIF()
IF(RUN_IGNORED_TESTS)
add_sh_test(dap4_test test_thredds)
ENDIF()
ENDIF(ENABLE_DAP_REMOTE_TESTS)

ENDIF(ENABLE_TESTS)
Expand Down
20 changes: 16 additions & 4 deletions dap4_test/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,21 @@ if ENABLE_DAP_REMOTE_TESTS
TESTS += test_remote.sh
TESTS += test_constraints.sh
TESTS += test_hyrax.sh
#TESTS += test_thredds.sh
TESTS += test_dap4url.sh

# The following test can only be run by hand.
# It tests earthdata authorization.
# Before running it, one needs to do the following:
# export USERPWD="<username>:<password>"
# where the username and password are registered with urs.earthdata.nasa.gov.
if AX_MANUAL
TESTS += test_earthdata.sh
endif

if AX_MANUAL
TESTS += test_thredds.sh
endif

endif
endif

Expand All @@ -59,7 +71,7 @@ EXTRA_DIST = CMakeLists.txt test_common.h build.sh \
test_curlopt.sh test_data.sh test_hyrax.sh test_meta.sh \
test_parse.sh test_raw.sh \
test_remote.sh test_constraints.sh test_thredds.sh \
test_dap4url.sh \
test_dap4url.sh test_earthdata.sh \
cdltestfiles rawtestfiles \
baseline baselineraw baselineremote baselinehyrax baselinethredds

Expand All @@ -81,8 +93,8 @@ clean-local: clean-local-check
.PHONY: clean-local-check

clean-local-check:
-rm -rf results results_test_*
-rm -f .dodsrc .daprc
-rm -rf results results_*
-rm -f .dodsrc .daprc .ncrc .netrc

# The shell file maketests.sh is used to build the testdata
# for dap4 testing. It creates and fills the directories
Expand Down
36 changes: 36 additions & 0 deletions dap4_test/test_earthdata.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/bin/sh

if test "x$srcdir" = x ; then srcdir=`pwd`; fi
. ../test_common.sh

set -e

isolate "results_earthdata_dap4"
THISDIR=`pwd`
cd $ISOPATH

# This shell script tests reading of
# publically accessible DAP4 datasets on earthdata

# Setup
# Assume environment variable USERPWD="username:password"
USR=`echo "$USERPWD" | cut -d':' -f1 | tr -d '\n\r'`
PWD=`echo "$USERPWD" | cut -d':' -f2 | tr -d '\n\r'`
rm -fr ./.netrc ./.ncrc ./urscookies
echo "machine urs.earthdata.nasa.gov login $USR password $PWD" > ./.netrc
echo "HTTP.COOKIEJAR=${ISOPATH}/urscookies" > ./.ncrc
echo "HTTP.NETRC=${ISOPATH}/.netrc" >> ./.ncrc

TESTCASES="opendap1;dap4://opendap.earthdata.nasa.gov/collections/C2532426483-ORNL_CLOUD/granules/Daymet_Daily_V4R1.daymet_v4_daily_hi_tmax_2010.nc"

testcase() {
NM=`echo "$1" | cut -d';' -f1`
URL=`echo "$1" | cut -d';' -f2`
echo "*** Test: $NM = $URL"
rm -f "tmp_$NM.cdl"
${NCDUMP} -h -n $NM $URL > "tmp_${NM}.cdl"
}

for t in $TESTCASES ; do
testcase "$t"
done
101 changes: 50 additions & 51 deletions docs/auth.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,14 @@ NetCDF Authorization Support {#auth}
netCDF can support user authorization using the facilities provided by the curl
library. This includes basic password authentication as well as
certificate-based authorization.

At the moment, this document only applies to DAP2 and DAP4 access
because they are (for now) the only parts of the netCDF-C library
that uses libcurl.
At the moment, this document only applies to DAP2 and DAP4 access.

With some exceptions (e.g. see the section on <a href="#REDIR">redirection</a>)
The libcurl authorization mechanisms can be accessed in two ways

1. Inserting the username and password into the url, or
2. Accessing information from a so-called _rc_ file named either
`.ncrc` or `.dodsrc`. The latter is deprecated, but will be supported indefinitely.
`.ncrc` or `.dodsrc`. The latter is historical and deprecated, but will be supported indefinitely.

## URL-Based Authentication {#auth_url}

Expand Down Expand Up @@ -52,29 +49,18 @@ Locating the _rc_ file is a multi-step process.

### Search Order

The file must be called one of the following names:
".daprc" or ".dodsrc".
If both ".daprc" and ".dodsrc" exist, then
the ".daprc" file will take precedence.

It is strongly suggested that you pick one of the two names
and use it always. Otherwise you may observe unexpected results
when the netcdf-c library finds one that you did not intend.
The netcdf-c library searches for, and loads from, the following files,
in this order:
1. $HOME/.ncrc
2. $HOME/.dodsrc
3. $CWD/.ncrc
4. $CWD/.dodsrc

The search for an _rc_ file looks in the following places in this order.
*$HOME* is the user's home directory and *$CWD* is the current working directory. Entries in later files override any of the earlier files

1. Check for the environment variable named _DAPRCFILE_.
This will specify the full path for the _rc_ file
(not just the containing directory).
2. Search the current working directory (`./`) looking
for (in order) .daprc or .dodsrc.
3. Search the HOME directory (`$HOME`) looking
for (in order) .daprc or .dodsrc. The HOME environment
variable is used to define the directory in which to search.

It is strongly suggested that you pick a uniform location
and use it always. Otherwise you may observe unexpected results
when the netcdf-c library get an rc file you did not expect.
It is strongly suggested that you pick a uniform location and a uniform name
and use them always. Otherwise you may observe unexpected results
when the netcdf-c library loads an rc file you did not expect.

### RC File Format

Expand All @@ -86,38 +72,34 @@ where the bracket-enclosed host:port is optional.

### URL Constrained RC File Entries

Each line of the rc file can begin with
a host+port enclosed in square brackets.
The form is "host:port".
If the port is not specified
then the form is just "host".
The reason that more of the url is not used is that
libcurl's authorization grain is not any finer than host level.

Examples.
Each line of the rc file can begin with a host+port enclosed in
square brackets. The form is "host:port". If the port is not
specified then the form is just "host". The reason that more of
the url is not used is that libcurl's authorization grain is not
any finer than host level.

Here are some examples.
````
[remotetest.unidata.ucar.edu]HTTP.VERBOSE=1

or

[fake.ucar.edu:9090]HTTP.VERBOSE=0

````
If the url request from, say, the _netcdf_open_ method
has a host+port matching one of the prefixes in the rc file, then
the corresponding entry will be used, otherwise ignored.
This means that an entry with a matching host+port will take
precedence over an entry without a host+port.

For example, the URL

````
http://remotetest.unidata.ucar.edu/thredds/dodsC/testdata/testData.nc

````
will have HTTP.VERBOSE set to 1 because its host matches the example above.

Similarly,

````
http://fake.ucar.edu:9090/dts/test.01

````
will have HTTP.VERBOSE set to 0 because its host+port matches the example above.

## Authorization-Related Keys {#auth_keys}
Expand All @@ -128,7 +110,7 @@ The second column is the affected curl_easy_setopt option(s), if any
<table>
<tr><th>Key</th><th>Affected curl_easy_setopt Options</th><th>Notes</th>
<tr><td>HTTP.COOKIEJAR</td><td>CURLOPT_COOKIEJAR</td>
<tr><td>HTTP.COOKIEFILE</td><td>CURLOPT_COOKIEJAR</td><td>Alias for CURLOPT_COOKIEJAR</td>
<tr><td>HTTP.COOKIEFILE</td><td>CURLOPT_COOKIEJAR</td><td>COOKIEJAR and COOKIEFILE are considered aliases, so setting one will set the other as well.</td>
<tr><td>HTTP.PROXY.SERVER</td><td>CURLOPT_PROXY, CURLOPT_PROXYPORT, CURLOPT_PROXYUSERPWD</td>
<tr><td>HTTP.PROXY_SERVER</td><td>CURLOPT_PROXY, CURLOPT_PROXYPORT, CURLOPT_PROXYUSERPWD</td><td>Decprecated: use HTTP.PROXY.SERVER</td>
<tr><td>HTTP.SSL.CERTIFICATE</td><td>CURLOPT_SSLCERT</td>
Expand All @@ -141,7 +123,7 @@ The second column is the affected curl_easy_setopt option(s), if any
<tr><td>HTTP.CREDENTIALS.USERPASSWORD</td><td>CURLOPT_USERPASSWORD</td>
<tr><td>HTTP.CREDENTIALS.USERNAME</td><td>CURLOPT_USERNAME</td>
<tr><td>HTTP.CREDENTIALS.PASSWORD</td><td>CURLOPT_PASSWORD</td>
<tr><td>HTTP.NETRC</td><td>N.A.</td><td>Specify path of the .netrc file</td>
<tr><td>HTTP.NETRC</td><td>CURLOPT_NETRC, CURLOPT_NETRC_FILE</td><td>Specify path of the .netrc file to use and enables its use.</td>
<tr><td>AWS.PROFILE</td><td>N.A.</td><td>Specify name of a profile in from the .aws/credentials file</td>
<tr><td>AWS.REGION</td><td>N.A.</td><td>Specify name of a default region</td>
</table>
Expand Down Expand Up @@ -188,7 +170,7 @@ HTTP.SSL.KEY is essentially the same as HTTP.SSL.CERTIFICATE
and should always have the same value.

HTTP.SSL.KEYPASSWORD
specifies the password for accessing the HTTP.SSL.CERTIFICAT/HTTP.SSL.key file.
specifies the password for accessing the HTTP.SSL.CERTIFICATE/HTTP.SSL.key file.

HTTP.SSL.CAPATH
specifies the path to a directory containing
Expand All @@ -207,9 +189,10 @@ HTTP.PROXY_SERVER
deprecated; use HTTP.PROXY.SERVER

HTTP.NETRC
specifies the absolute path of the .netrc file.
specifies the absolute path of the .netrc file,
and causes it to be used instead of username and password.
See [redirection authorization](#REDIR)
for information about using .netrc.
for information about using *.netrc*.

## Password Escaping {#auth_userpwdescape}

Expand Down Expand Up @@ -252,12 +235,12 @@ using the _https_ protocol (note the use of _https_ instead of _http_).
the client back to the SOI to actually obtain the data.

It turns out that libcurl, by default, uses the password in the
`.daprc` file (or from the url) for all connections that request
`.ncrc` file (or from the url) for all connections that request
a password. This causes problems because only the the specific
redirected connection is the one that actually requires the password.
This is where the `.netrc` file comes in. Libcurl will use `.netrc`
for the redirected connection. It is possible to cause libcurl
to use the `.daprc` password always, but this introduces a
to use the `.ncrc` password always, but this introduces a
security hole because it may send the initial user+pwd to every
server in the redirection chain.
In summary, if you are using redirection, then you are
Expand All @@ -274,9 +257,9 @@ which the client is redirected for authorization, and the
login and password are those needed to authenticate on that machine.

The location of the `.netrc` file can be specified by
putting the following line in your `.daprc`/`.dodsrc` file.
putting the following line in your `.ncrc`/`.dodsrc` file.

HTTP.NETRC=<path to netrc file>
HTTP.NETRC=<path to .ncrc file>

If not specified, then libcurl will look first in the current
directory, and then in the HOME directory.
Expand All @@ -286,6 +269,22 @@ to specify a real file in the file system to act as the
cookie jar file (HTTP.COOKIEJAR) so that the
redirect site can properly pass back authorization information.

### Accessing *earthdata.nasa.gov*

Since it is so common, here is a set of templates to use to
access *earthdata.nasa.gov*.

#### *.ncrc* File
````
HTTP.NETRC=/home/<user>/.netrc
HTTP.COOKIEJAR=/home/<user>/.urs_cookies
````

#### *.netrc* File
````
machine urs.earthdata.nasa.gov login <user> password <password>
````

## Client-Side Certificates {#auth_clientcerts}

Some systems, notably ESG (Earth System Grid), requires
Expand Down
28 changes: 12 additions & 16 deletions libdispatch/drc.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,6 @@ See COPYRIGHT for license information.
#include "nc4internal.h"
#include "ncdispatch.h"

#ifndef nulldup
#define nulldup(x) ((x)?strdup(x):(x))
#endif

#undef NOREAD

#undef DRCDEBUG
Expand Down Expand Up @@ -66,7 +62,7 @@ static void freeprofile(struct AWSprofile* profile);
static void freeprofilelist(NClist* profiles);

/* Define default rc files and aliases, also defines load order*/
static const char* rcfilenames[] = {".ncrc", ".daprc", ".dodsrc",NULL};
static const char* rcfilenames[] = {".ncrc", ".daprc", ".dodsrc", NULL};

/* Read these files in order and later overriding earlier */
static const char* awsconfigfiles[] = {".aws/config",".aws/credentials",NULL};
Expand Down Expand Up @@ -97,9 +93,11 @@ nc_rc_get(const char* key)

ncg = NC_getglobalstate();
assert(ncg != NULL && ncg->rcinfo != NULL && ncg->rcinfo->entries != NULL);
if(ncg->rcinfo->ignore) return NC_NOERR;
if(ncg->rcinfo->ignore) goto done;
value = NC_rclookup(key,NULL,NULL);
return nulldup(value);
done:
value = nulldup(value);
return value;
}

/**
Expand All @@ -121,8 +119,9 @@ nc_rc_set(const char* key, const char* value)

ncg = NC_getglobalstate();
assert(ncg != NULL && ncg->rcinfo != NULL && ncg->rcinfo->entries != NULL);
if(ncg->rcinfo->ignore) return NC_NOERR;
if(ncg->rcinfo->ignore) goto done;;
stat = NC_rcfile_insert(key,NULL,NULL,value);
done:
return stat;
}

Expand All @@ -132,7 +131,6 @@ nc_rc_set(const char* key, const char* value)
/*
Initialize defaults and load:
* .ncrc
* .daprc
* .dodsrc
* ${HOME}/.aws/config
* ${HOME}/.aws/credentials
Expand Down Expand Up @@ -235,13 +233,11 @@ NC_rcload(void)
/* locate the configuration files in order of use:
1. Specified by NCRCENV_RC environment variable.
2. If NCRCENV_RC is not set then merge the set of rc files in this order:
1. $RCHOME/.ncrc
2. $RCHOME/.daprc
3. $RCHOME/.docsrc
4. $CWD/.ncrc
5. $CWD/.daprc
6. $CWD/.docsrc
Entry in later files override any of the earlier files
1. $HOME/.ncrc
2. $HOME/.dodsrc
3. $CWD/.ncrc
4. $CWD/.dodsrc
Entries in later files override any of the earlier files
*/
if(globalstate->rcinfo->rcfile != NULL) { /* always use this */
nclistpush(rcfileorder,strdup(globalstate->rcinfo->rcfile));
Expand Down
Loading