From 8c0378fb30f15b06be1e3118cb8f0b537c9e4149 Mon Sep 17 00:00:00 2001
From: zbruick <zachary.bruick@valpo.edu>
Date: Thu, 27 Jun 2019 12:09:42 -0600
Subject: [PATCH 1/2] Adds a module to convert pandas dataframes to netCDF
 files. Intended for conversion to DSG netCDF format, but if not enough
 parameters are set by the user, a non-CF compliant netCDF will be written,
 but warned about. This should work for time series, profiles, or
 trajectories, with main testing done against METAR dataframes.

---
 metpy/io/pandas_to_netcdf.py            | 172 ++++++++++++++++++++++++
 metpy/io/tests/test_pandas_to_netcdf.py |  95 +++++++++++++
 src/metpy/io/__init__.py                |  15 ++-
 3 files changed, 277 insertions(+), 5 deletions(-)
 create mode 100644 metpy/io/pandas_to_netcdf.py
 create mode 100644 metpy/io/tests/test_pandas_to_netcdf.py

diff --git a/metpy/io/pandas_to_netcdf.py b/metpy/io/pandas_to_netcdf.py
new file mode 100644
index 00000000000..d17a6a6b801
--- /dev/null
+++ b/metpy/io/pandas_to_netcdf.py
@@ -0,0 +1,172 @@
+# Copyright (c) 2019 MetPy Developers.
+# Distributed under the terms of the BSD 3-Clause License.
+# SPDX-License-Identifier: BSD-3-Clause
+"""Support reading a pandas dataframe to a DSG netCDF."""
+
+import logging
+from os import path
+
+from numpy import arange
+import xarray as xr
+
+from ..package_tools import Exporter
+
+exporter = Exporter(globals())
+
+log = logging.getLogger(__name__)
+
+
+@exporter.export
+def dataframe_to_netcdf(df, sampling_var, sampling_data_vars, path_to_save, netcdf_format=None,
+                        column_units=None, standard_names=None, long_names=None,
+                        dataset_type=None):
+    r"""Take a Pandas DataFrame and convert it to a netCDF file.
+
+    If given a Pandas DataFrame, this function will first convert
+    it to a xarray Dataset, attach attributes and metadata to it as
+    provided by the user, and then save it as a CF-compliant discrete
+    sampling geometry (DGS) netCDF file. Assumes each row of the DataFrame
+    is a unique observation
+
+    This function is ideal for point data, such as station observations,
+    or for trajectory or profile data, which is discretely sampled at
+    individual points
+
+    Parameters
+    ----------
+    df : `pandas.DataFrame`
+        Point data in pandas dataframe.
+
+    sampling_var : str
+        Column name that is the sampling dimension: for surface observations,
+        this is the column that contains the station identifier/name
+
+    sampling_data_vars : list
+        List of all variables associated with the sampling variable that do not
+        vary with time, such as latitude, longitude, and elevation for
+        surface observations
+
+    path_to_save : str
+        Path, including filename, for where to save netCDF file.
+
+    netcdf_format : str, optional
+
+    column_units : dict, optional
+        Dictionary of units to attach to columns of the dataframe. Overrides
+        the units attribute if it is attached to the dataframe.
+
+    standard_names : dict, optional
+        Dictionary of variable descriptions that are CF-compliant
+
+    long_names : dict, optional
+        Dictionary of longer variable descriptions that provide more detail
+        than standard_names
+
+    dataset_type: str, optional
+        Type of dataset to be converted. Options are 'timeSeries', 'profile',
+        or 'trajectory'. While optional, this variable should be declared to create
+        a CF-compliant DSG netCDF file.
+
+    Returns
+    -------
+    NetCDF file saved to `path_to_save`.
+
+    """
+    # Verify_integrity must be true in order for conversion to netCDF to work
+    # Return a TypeError if not provided a Pandas DataFrame
+    try:
+        # Create the dimensions for use later in netCDF file
+        samplingindex = df.groupby([sampling_var], sort=False).ngroup()
+        obs = arange(0, len(df))
+        df.insert(0, 'samplingIndex', samplingindex)
+        df.insert(1, 'observations', obs)
+
+        # Handle the sampling location specific data
+        sampling_data = df[sampling_data_vars]
+        samples = sampling_data.groupby([sampling_var], sort=False).ngroup()
+        sampling_data.insert(0, 'samples', samples)
+        sampling_data = sampling_data.groupby('samples').first()
+        dataset_samples = xr.Dataset.from_dataframe(sampling_data)
+
+        # Create the dataset for the variables of each observation
+        df = df.drop(sampling_data_vars, axis=1)
+        df = df.set_index(['observations'], verify_integrity=True)
+        dataset_var = xr.Dataset.from_dataframe(df)
+
+        # Merge the two datasets together
+        dataset_final = xr.merge([dataset_samples, dataset_var], compat='no_conflicts')
+
+    except (AttributeError, ValueError, TypeError):
+        raise TypeError('A pandas dataframe was not provided')
+
+    # Attach variable-specific metadata
+    _assign_metadata(dataset_final, column_units, standard_names, long_names)
+
+    # Attach dataset-specific metadata
+    if dataset_type:
+        dataset_final.attrs['featureType'] = dataset_type
+    else:
+        log.warning('No dataset type provided - netCDF will not have appropriate metadata'
+                    'for a DSG dataset.')
+    if dataset_type:
+        dataset_final[sampling_var].attrs['cf_role'] = dataset_type.lower() + '_id'
+    dataset_final['samplingIndex'].attrs['instance_dimension'] = 'samples'
+
+    # Determine mode to write to netCDF
+    write_mode = 'w'
+    if path.exists(path_to_save):
+        # Eventually switch to 'a' to allow appending and delete error
+        raise ValueError('File already exists - please delete and run again')
+
+    # Check if netCDF4 is installed to see how many unlimited dimensions we can use
+    # Need conditional import for checking due to Python 2
+    try:
+        from importlib.util import find_spec
+        check_netcdf4 = find_spec('netCDF4')
+    except ImportError:
+        from imp import find_module
+        check_netcdf4 = find_module('netCDF4')
+
+    # Make sure path is a string to allow netCDF4 to be used - needed for tests to pass
+    path_to_save = str(path_to_save)
+
+    if check_netcdf4 is not None:
+        unlimited_dimensions = ['samples', 'observations']
+    else:
+        # Due to xarray's fallback to scipy if netCDF4-python is not installed
+        # only one dimension can be unlimited. This may cause issues for users
+        log.warning('NetCDF4 not installed - saving as a netCDF3 file with only the'
+                    'observations dimension as unlimited. If netCDF4 or multiple'
+                    'dimensions are desired, run `pip install netCDF4`')
+        unlimited_dimensions = ['observations']
+
+    # Convert to netCDF
+    dataset_final.to_netcdf(path=path_to_save, mode=write_mode, format=netcdf_format,
+                            unlimited_dims=unlimited_dimensions, compute=True)
+
+
+def _assign_metadata(dataset, units_dict, standard_names_dict, long_names_dict):
+    if units_dict is not None:
+        final_column_units = {}
+        final_column_units['samples'] = ''
+        final_column_units['observations'] = ''
+        final_column_units['samplingIndex'] = ''
+        final_column_units.update(units_dict)
+        for var in dataset.variables:
+            dataset[var].attrs['units'] = final_column_units[var]
+    if standard_names_dict is not None:
+        final_std_names = {}
+        final_std_names['samples'] = ''
+        final_std_names['observations'] = ''
+        final_std_names['samplingIndex'] = ''
+        final_std_names.update(standard_names_dict)
+        for var in dataset.variables:
+            dataset[var].attrs['standard_name'] = final_std_names[var]
+    if long_names_dict is not None:
+        final_long_names = {}
+        final_long_names['samples'] = 'Sampling dimension'
+        final_long_names['observations'] = 'Observation dimension'
+        final_long_names['samplingIndex'] = 'Index of station for this observation'
+        final_long_names.update(long_names_dict)
+        for var in dataset.variables:
+            dataset[var].attrs['long_name'] = final_long_names[var]
diff --git a/metpy/io/tests/test_pandas_to_netcdf.py b/metpy/io/tests/test_pandas_to_netcdf.py
new file mode 100644
index 00000000000..6042079e997
--- /dev/null
+++ b/metpy/io/tests/test_pandas_to_netcdf.py
@@ -0,0 +1,95 @@
+# Copyright (c) 2019 MetPy Developers.
+# Distributed under the terms of the BSD 3-Clause License.
+# SPDX-License-Identifier: BSD-3-Clause
+"""Test the `pandas_to_netcdf` module."""
+
+import logging
+import os
+
+import numpy as np
+import pandas as pd
+import pytest
+import xarray as xr
+
+from metpy.cbook import get_test_data
+from metpy.io import dataframe_to_netcdf
+
+# Turn off the warnings for tests
+logging.getLogger('metpy.io.pandas_to_netcdf').setLevel(logging.CRITICAL)
+
+
+@pytest.fixture
+def test_df():
+    """Create generic dataframe for testing."""
+    return pd.DataFrame({
+        'temperature': pd.Series([1, 2, 2, 3]), 'pressure': pd.Series([1, 2, 2, 3]),
+        'latitude': pd.Series([4, 5, 6, 7]), 'longitude': pd.Series([1, 2, 3, 4]),
+        'station_id': pd.Series(['KFNL', 'KDEN', 'KVPZ', 'KORD'])})
+
+
+def test_dataframe_to_netcdf_basic(tmpdir):
+    """Test dataframe conversion to netcdf."""
+    df = pd.read_csv(get_test_data('station_data.txt'), usecols=[0, 1, 2, 3, 4, 5])
+    df = df.rename(columns={'latitude[unit="degrees_north"]': 'latitude',
+                            'longitude[unit="degrees_east"]': 'longitude',
+                            'air_pressure_at_sea_level[unit="hectoPascal"]':
+                                'mean_sea_level_pressure',
+                            'air_temperature[unit="Celsius"]': 'temperature'})
+    dataframe_to_netcdf(df, path_to_save=str(tmpdir) + '/test.nc', sampling_var='station',
+                        sampling_data_vars=['station', 'latitude', 'longitude'])
+    assert os.path.exists(str(tmpdir) + '/test.nc')
+    data = xr.open_dataset(str(tmpdir) + '/test.nc')
+    assert np.max(data['temperature']) == 27
+
+
+def test_dataframe_to_netcdf_units(tmpdir):
+    """Test units attached via a dictionary."""
+    df = pd.read_csv(get_test_data('station_data.txt'), usecols=[0, 1, 2, 3, 4, 5])
+    df = df.rename(columns={'latitude[unit="degrees_north"]': 'latitude',
+                            'longitude[unit="degrees_east"]': 'longitude',
+                            'air_pressure_at_sea_level[unit="hectoPascal"]':
+                                'mean_sea_level_pressure',
+                            'air_temperature[unit="Celsius"]': 'temperature'})
+    col_units = {'samples': '', 'observations': '', 'samplingIndex': '', 'station': '',
+                 'latitude': 'degrees', 'longitude': 'degrees', 'temperature': 'degC',
+                 'mean_sea_level_pressure': 'hPa', 'time': ''}
+    dataframe_to_netcdf(df, path_to_save=str(tmpdir) + '/test.nc', sampling_var='station',
+                        sampling_data_vars=['station', 'latitude', 'longitude'],
+                        column_units=col_units, dataset_type='timeSeries')
+    data = xr.open_dataset(str(tmpdir) + '/test.nc')
+    assert data['station'].attrs['cf_role'] == 'timeseries_id'
+    assert data['temperature'].attrs['units'] == 'degC'
+
+
+def test_dataframe_to_netcdf_names(test_df, tmpdir):
+    """Test attachment of standard names via a dictionary."""
+    long_names = {'temperature': '2-meter air temperature',
+                  'pressure': 'Mean sea-level air pressure', 'latitude': 'Station latitude',
+                  'longitude': 'Station longitude', 'station_id': 'Station identifier'}
+    standard_names = {'temperature': 'air_temperature',
+                      'pressure': 'air_pressure_at_mean_sea_level', 'latitude': 'latitude',
+                      'longitude': 'longitude', 'station_id': 'platform_id'}
+    dataframe_to_netcdf(test_df, path_to_save=str(tmpdir) + '/test.nc',
+                        sampling_var='station_id',
+                        sampling_data_vars=['station_id', 'latitude', 'longitude'],
+                        standard_names=standard_names, long_names=long_names)
+    data = xr.open_dataset(str(tmpdir) + '/test.nc')
+    assert data['temperature'].attrs['standard_name'] == 'air_temperature'
+    assert data['station_id'].attrs['long_name'] == 'Station identifier'
+
+
+def test_no_dataframe(tmpdir):
+    """Test error message if Pandas DataFrame is not provided."""
+    array = np.arange(0, 10)
+    with pytest.raises(TypeError, match='A pandas dataframe was not provided'):
+        dataframe_to_netcdf(array, path_to_save=str(tmpdir) + '/test.nc', sampling_var=None,
+                            sampling_data_vars=None)
+
+
+def test_file_exists(test_df, tmpdir):
+    """Test error message if netCDF file already exists."""
+    open(str(tmpdir) + '/test.nc', 'wb')
+    with pytest.raises(ValueError, match='File already exists - please delete and run again'):
+        dataframe_to_netcdf(test_df, path_to_save=str(tmpdir) + '/test.nc',
+                            sampling_var='station_id',
+                            sampling_data_vars=['station_id', 'latitude', 'longitude'])
diff --git a/src/metpy/io/__init__.py b/src/metpy/io/__init__.py
index 5772e516c21..85e2c988462 100644
--- a/src/metpy/io/__init__.py
+++ b/src/metpy/io/__init__.py
@@ -1,15 +1,20 @@
-# Copyright (c) 2015,2016,2018 MetPy Developers.
+# Copyright (c) 2015,2016,2018,2019 MetPy Developers.
 # Distributed under the terms of the BSD 3-Clause License.
 # SPDX-License-Identifier: BSD-3-Clause
-"""Classes for reading various file formats.
+"""Classes for reading and writing various file formats.
 
-These classes are written to take both file names (for local files) or file-like objects;
-this allows reading files that are already in memory (using :class:`python:io.StringIO`)
-or remote files (using :func:`~python:urllib.request.urlopen`).
+The gini and nexrad classes are written to take both file names (for local files)
+or file-like objects; this allows reading files that are already in memory (using
+:class:`python:io.StringIO`) or remote files (using :func:`~python:urllib.request.urlopen`).
+
+The `dataframe_to_netcdf` function take a pandas dataframe and writes a netCDF file (in DSG
+format if applicable).
 """
 
 from .gini import *  # noqa: F403
 from .nexrad import *  # noqa: F403
+from .pandas_to_netcdf import *  # noqa: F403
 
 __all__ = gini.__all__[:]  # pylint: disable=undefined-variable
 __all__.extend(nexrad.__all__)  # pylint: disable=undefined-variable
+__all__.extend(pandas_to_netcdf.__all__)  # pylint: disable=undefined-variable

From 4dad48f6821b939752facb17b7a44b7b95e51068 Mon Sep 17 00:00:00 2001
From: zbruick <zachary.bruick@valpo.edu>
Date: Thu, 15 Aug 2019 14:53:28 -0600
Subject: [PATCH 2/2] Add appending capability to pandas_to_netcdf

---
 {metpy => src/metpy}/io/pandas_to_netcdf.py   | 105 +++++++++++++-----
 .../io}/test_pandas_to_netcdf.py              |  75 ++++++++++---
 2 files changed, 138 insertions(+), 42 deletions(-)
 rename {metpy => src/metpy}/io/pandas_to_netcdf.py (62%)
 rename {metpy/io/tests => tests/io}/test_pandas_to_netcdf.py (51%)

diff --git a/metpy/io/pandas_to_netcdf.py b/src/metpy/io/pandas_to_netcdf.py
similarity index 62%
rename from metpy/io/pandas_to_netcdf.py
rename to src/metpy/io/pandas_to_netcdf.py
index d17a6a6b801..562e2f341cc 100644
--- a/metpy/io/pandas_to_netcdf.py
+++ b/src/metpy/io/pandas_to_netcdf.py
@@ -4,9 +4,10 @@
 """Support reading a pandas dataframe to a DSG netCDF."""
 
 import logging
-from os import path
+import os
 
 from numpy import arange
+import pandas as pd
 import xarray as xr
 
 from ..package_tools import Exporter
@@ -17,9 +18,9 @@
 
 
 @exporter.export
-def dataframe_to_netcdf(df, sampling_var, sampling_data_vars, path_to_save, netcdf_format=None,
-                        column_units=None, standard_names=None, long_names=None,
-                        dataset_type=None):
+def dataframe_to_netcdf(df, mode, sampling_var, sampling_data_vars, path_to_save,
+                        netcdf_format=None, column_units=None, standard_names=None,
+                        long_names=None, dataset_type=None):
     r"""Take a Pandas DataFrame and convert it to a netCDF file.
 
     If given a Pandas DataFrame, this function will first convert
@@ -37,6 +38,11 @@ def dataframe_to_netcdf(df, sampling_var, sampling_data_vars, path_to_save, netc
     df : `pandas.DataFrame`
         Point data in pandas dataframe.
 
+    mode : str
+        Specify whether to write ('w') a new netCDF file or append ('a') to an existing file.
+        If 'w' is specified and the `path_to_save` already exists, the file will be
+        overwritten.
+
     sampling_var : str
         Column name that is the sampling dimension: for surface observations,
         this is the column that contains the station identifier/name
@@ -71,6 +77,29 @@ def dataframe_to_netcdf(df, sampling_var, sampling_data_vars, path_to_save, netc
     -------
     NetCDF file saved to `path_to_save`.
 
+    Notes
+    -----
+    If append mode is used, all metadata will be preserved, but will be overwritten by
+    user input.
+
+    """
+    if mode == 'w':
+        _write_to_netcdf(df, sampling_var, sampling_data_vars, path_to_save,
+                         netcdf_format, column_units, standard_names, long_names,
+                         dataset_type)
+    elif mode == 'a':
+        _append_to_netcdf(df, sampling_var, sampling_data_vars, path_to_save,
+                          netcdf_format, column_units, standard_names, long_names,
+                          dataset_type)
+    else:
+        raise ValueError('Mode must either be "w" or "a".')
+
+
+def _write_to_netcdf(df, sampling_var, sampling_data_vars, path_to_save, netcdf_format,
+                     column_units, standard_names, long_names, dataset_type):
+    """Write Pandas DataFrame to netCDF file.
+
+    This will overwrite any existing file at `path_to_save`.
     """
     # Verify_integrity must be true in order for conversion to netCDF to work
     # Return a TypeError if not provided a Pandas DataFrame
@@ -105,18 +134,15 @@ def dataframe_to_netcdf(df, sampling_var, sampling_data_vars, path_to_save, netc
     # Attach dataset-specific metadata
     if dataset_type:
         dataset_final.attrs['featureType'] = dataset_type
+        dataset_final[sampling_var].attrs['cf_role'] = dataset_type.lower() + '_id'
     else:
         log.warning('No dataset type provided - netCDF will not have appropriate metadata'
                     'for a DSG dataset.')
-    if dataset_type:
-        dataset_final[sampling_var].attrs['cf_role'] = dataset_type.lower() + '_id'
     dataset_final['samplingIndex'].attrs['instance_dimension'] = 'samples'
 
-    # Determine mode to write to netCDF
-    write_mode = 'w'
-    if path.exists(path_to_save):
-        # Eventually switch to 'a' to allow appending and delete error
-        raise ValueError('File already exists - please delete and run again')
+    # Remove any existing file
+    if os.path.exists(str(path_to_save)):
+        os.remove(str(path_to_save))
 
     # Check if netCDF4 is installed to see how many unlimited dimensions we can use
     # Need conditional import for checking due to Python 2
@@ -127,9 +153,6 @@ def dataframe_to_netcdf(df, sampling_var, sampling_data_vars, path_to_save, netc
         from imp import find_module
         check_netcdf4 = find_module('netCDF4')
 
-    # Make sure path is a string to allow netCDF4 to be used - needed for tests to pass
-    path_to_save = str(path_to_save)
-
     if check_netcdf4 is not None:
         unlimited_dimensions = ['samples', 'observations']
     else:
@@ -141,27 +164,52 @@ def dataframe_to_netcdf(df, sampling_var, sampling_data_vars, path_to_save, netc
         unlimited_dimensions = ['observations']
 
     # Convert to netCDF
-    dataset_final.to_netcdf(path=path_to_save, mode=write_mode, format=netcdf_format,
+    dataset_final.to_netcdf(path=str(path_to_save), mode='w', format=netcdf_format,
                             unlimited_dims=unlimited_dimensions, compute=True)
 
 
+def _append_to_netcdf(df, sampling_var, sampling_data_vars, path_to_save,
+                      netcdf_format, column_units, standard_names, long_names, dataset_type):
+    """Append to existing netCDF file."""
+    ds = xr.open_dataset(str(path_to_save))
+    df_old = (ds.to_dataframe().reset_index()
+              .drop(columns=['samplingIndex', 'observations', 'samples']))
+    df_new = pd.concat([df_old, df], sort=False).reset_index(drop=True)  # Pandas dependency
+
+    # Assign metadata here
+    if dataset_type is None and 'featureType' in ds.attrs:
+        dataset_type = ds.attrs['featureType']
+    append_column_units = {}
+    append_standard_names = {}
+    append_long_names = {}
+    for var_name, da in ds.data_vars.items():
+        if 'units' in da.attrs:
+            append_column_units[var_name] = da.attrs['units']
+        if 'standard_name' in da.attrs:
+            append_standard_names[var_name] = da.attrs['standard_name']
+        if 'long_name' in da.attrs:
+            append_long_names[var_name] = da.attrs['long_name']
+    if column_units is not None:
+        append_column_units.update(column_units)
+    if standard_names is not None:
+        append_standard_names.update(standard_names)
+    if long_names is not None:
+        append_long_names.update(long_names)
+
+    _write_to_netcdf(df_new, sampling_var, sampling_data_vars, path_to_save,
+                     netcdf_format, append_column_units, append_standard_names,
+                     append_long_names, dataset_type)
+
+
 def _assign_metadata(dataset, units_dict, standard_names_dict, long_names_dict):
     if units_dict is not None:
-        final_column_units = {}
-        final_column_units['samples'] = ''
-        final_column_units['observations'] = ''
-        final_column_units['samplingIndex'] = ''
-        final_column_units.update(units_dict)
         for var in dataset.variables:
-            dataset[var].attrs['units'] = final_column_units[var]
+            if var in units_dict:
+                dataset[var].attrs['units'] = units_dict[var]
     if standard_names_dict is not None:
-        final_std_names = {}
-        final_std_names['samples'] = ''
-        final_std_names['observations'] = ''
-        final_std_names['samplingIndex'] = ''
-        final_std_names.update(standard_names_dict)
         for var in dataset.variables:
-            dataset[var].attrs['standard_name'] = final_std_names[var]
+            if var in standard_names_dict:
+                dataset[var].attrs['standard_name'] = standard_names_dict[var]
     if long_names_dict is not None:
         final_long_names = {}
         final_long_names['samples'] = 'Sampling dimension'
@@ -169,4 +217,5 @@ def _assign_metadata(dataset, units_dict, standard_names_dict, long_names_dict):
         final_long_names['samplingIndex'] = 'Index of station for this observation'
         final_long_names.update(long_names_dict)
         for var in dataset.variables:
-            dataset[var].attrs['long_name'] = final_long_names[var]
+            if var in final_long_names:
+                dataset[var].attrs['long_name'] = final_long_names[var]
diff --git a/metpy/io/tests/test_pandas_to_netcdf.py b/tests/io/test_pandas_to_netcdf.py
similarity index 51%
rename from metpy/io/tests/test_pandas_to_netcdf.py
rename to tests/io/test_pandas_to_netcdf.py
index 6042079e997..c0755531386 100644
--- a/metpy/io/tests/test_pandas_to_netcdf.py
+++ b/tests/io/test_pandas_to_netcdf.py
@@ -27,6 +27,15 @@ def test_df():
         'station_id': pd.Series(['KFNL', 'KDEN', 'KVPZ', 'KORD'])})
 
 
+@pytest.fixture
+def test_df2():
+    """Create generic dataframe for appending."""
+    return pd.DataFrame({
+        'temperature': pd.Series([20]), 'pressure': pd.Series([1010]),
+        'latitude': pd.Series([40]), 'longitude': pd.Series([-65]),
+        'station_id': pd.Series(['KLGA'])})
+
+
 def test_dataframe_to_netcdf_basic(tmpdir):
     """Test dataframe conversion to netcdf."""
     df = pd.read_csv(get_test_data('station_data.txt'), usecols=[0, 1, 2, 3, 4, 5])
@@ -35,8 +44,9 @@ def test_dataframe_to_netcdf_basic(tmpdir):
                             'air_pressure_at_sea_level[unit="hectoPascal"]':
                                 'mean_sea_level_pressure',
                             'air_temperature[unit="Celsius"]': 'temperature'})
-    dataframe_to_netcdf(df, path_to_save=str(tmpdir) + '/test.nc', sampling_var='station',
-                        sampling_data_vars=['station', 'latitude', 'longitude'])
+    dataframe_to_netcdf(df, mode='w', path_to_save=str(tmpdir) + '/test.nc',
+                        sampling_var='station', sampling_data_vars=['station', 'latitude',
+                                                                    'longitude'])
     assert os.path.exists(str(tmpdir) + '/test.nc')
     data = xr.open_dataset(str(tmpdir) + '/test.nc')
     assert np.max(data['temperature']) == 27
@@ -50,10 +60,10 @@ def test_dataframe_to_netcdf_units(tmpdir):
                             'air_pressure_at_sea_level[unit="hectoPascal"]':
                                 'mean_sea_level_pressure',
                             'air_temperature[unit="Celsius"]': 'temperature'})
-    col_units = {'samples': '', 'observations': '', 'samplingIndex': '', 'station': '',
-                 'latitude': 'degrees', 'longitude': 'degrees', 'temperature': 'degC',
-                 'mean_sea_level_pressure': 'hPa', 'time': ''}
-    dataframe_to_netcdf(df, path_to_save=str(tmpdir) + '/test.nc', sampling_var='station',
+    col_units = {'latitude': 'degrees', 'longitude': 'degrees', 'temperature': 'degC',
+                 'mean_sea_level_pressure': 'hPa'}
+    dataframe_to_netcdf(df, mode='w', path_to_save=str(tmpdir) + '/test.nc',
+                        sampling_var='station',
                         sampling_data_vars=['station', 'latitude', 'longitude'],
                         column_units=col_units, dataset_type='timeSeries')
     data = xr.open_dataset(str(tmpdir) + '/test.nc')
@@ -69,7 +79,7 @@ def test_dataframe_to_netcdf_names(test_df, tmpdir):
     standard_names = {'temperature': 'air_temperature',
                       'pressure': 'air_pressure_at_mean_sea_level', 'latitude': 'latitude',
                       'longitude': 'longitude', 'station_id': 'platform_id'}
-    dataframe_to_netcdf(test_df, path_to_save=str(tmpdir) + '/test.nc',
+    dataframe_to_netcdf(test_df, mode='w', path_to_save=str(tmpdir) + '/test.nc',
                         sampling_var='station_id',
                         sampling_data_vars=['station_id', 'latitude', 'longitude'],
                         standard_names=standard_names, long_names=long_names)
@@ -82,14 +92,51 @@ def test_no_dataframe(tmpdir):
     """Test error message if Pandas DataFrame is not provided."""
     array = np.arange(0, 10)
     with pytest.raises(TypeError, match='A pandas dataframe was not provided'):
-        dataframe_to_netcdf(array, path_to_save=str(tmpdir) + '/test.nc', sampling_var=None,
-                            sampling_data_vars=None)
+        dataframe_to_netcdf(array, mode='w', path_to_save=str(tmpdir) + '/test.nc',
+                            sampling_var=None, sampling_data_vars=None)
 
 
-def test_file_exists(test_df, tmpdir):
-    """Test error message if netCDF file already exists."""
-    open(str(tmpdir) + '/test.nc', 'wb')
-    with pytest.raises(ValueError, match='File already exists - please delete and run again'):
-        dataframe_to_netcdf(test_df, path_to_save=str(tmpdir) + '/test.nc',
+def test_invalid_mode_option(test_df, tmpdir):
+    """Test error message if an incorrect file mode is specified."""
+    with pytest.raises(ValueError, match='Mode must either be "w" or "a".'):
+        dataframe_to_netcdf(test_df, mode='r', path_to_save=str(tmpdir) + '/test.nc',
                             sampling_var='station_id',
                             sampling_data_vars=['station_id', 'latitude', 'longitude'])
+
+
+def test_append_basic(test_df, test_df2, tmpdir):
+    """Test appending to an existing file."""
+    dataframe_to_netcdf(test_df, mode='w', path_to_save=str(tmpdir) + '/test.nc',
+                        sampling_var='station_id',
+                        sampling_data_vars=['station_id', 'latitude', 'longitude'])
+    dataframe_to_netcdf(test_df2, mode='a', path_to_save=str(tmpdir) + '/test.nc',
+                        sampling_var='station_id',
+                        sampling_data_vars=['station_id', 'latitude', 'longitude'])
+    data = xr.open_dataset(str(tmpdir) + '/test.nc')
+    assert 'KLGA' in data['station_id']
+    assert data.dims['samples'] == 5
+    assert data.dims['observations'] == 17
+
+
+def test_append_attributes(test_df, test_df2, tmpdir):
+    """Test appending dataset with existing attributes."""
+    units = {'temperature': 'degC', 'pressure': 'hPa', 'latitude': 'degrees',
+             'longitude': 'degrees'}
+    long_names = {'temperature': '2-meter air temperature',
+                  'pressure': 'Mean sea-level air pressure', 'latitude': 'Station latitude',
+                  'longitude': 'Station longitude', 'station_id': 'Station identifier'}
+    standard_names = {'temperature': 'air_temperature',
+                      'pressure': 'air_pressure_at_mean_sea_level', 'latitude': 'latitude',
+                      'longitude': 'longitude', 'station_id': 'platform_id'}
+    dataframe_to_netcdf(test_df, mode='w', path_to_save=str(tmpdir) + '/test.nc',
+                        sampling_var='station_id',
+                        sampling_data_vars=['station_id', 'latitude', 'longitude'],
+                        column_units=units, standard_names=standard_names,
+                        long_names=long_names, dataset_type='timeSeries')
+    dataframe_to_netcdf(test_df2, mode='a', path_to_save=str(tmpdir) + '/test.nc',
+                        sampling_var='station_id',
+                        sampling_data_vars=['station_id', 'latitude', 'longitude'])
+    data = xr.open_dataset(str(tmpdir) + '/test.nc')
+    assert data.temperature.attrs['units'] == 'degC'
+    assert data.attrs['featureType'] == 'timeSeries'
+    assert data.station_id.attrs['cf_role'] == 'timeseries_id'