From 2f9b89423c29cbf9572b83c00589bc99662fcb51 Mon Sep 17 00:00:00 2001
From: William Dutton
Date: Wed, 1 Nov 2023 08:51:50 +1000
Subject: [PATCH 1/5] Quality of life improvements
* Performance Improvement: Skip rows when showing datastore load when handing multi million line records
* Improve testing on mixed formatting where type loading causes bad loads when it has a empty field on type timestamp/numeric, 'can't be string' must be none
* Include shortcut to datastore when loaded for Package/Resource maintainers+
* Include dependabot pr auto raising to help keep current
* Make flake8 more stringent
---
.flake8 | 4 -
.github/dependabot.yml | 18 ++
ckanext/xloader/action.py | 7 +-
ckanext/xloader/command.py | 3 +-
ckanext/xloader/config_declaration.yaml | 13 +-
ckanext/xloader/helpers.py | 19 ++
ckanext/xloader/jobs.py | 25 +-
ckanext/xloader/loader.py | 20 +-
ckanext/xloader/plugin.py | 33 +--
.../templates/package/resource_edit_base.html | 4 +-
.../templates/package/resource_read.html | 8 +
.../package/snippets/resource_item.html | 8 +
.../templates/package/snippets/resources.html | 8 +
.../templates/xloader/resource_data.html | 32 ++-
ckanext/xloader/tests/ckan_setup.py | 2 +-
ckanext/xloader/tests/fixtures.py | 5 +-
.../samples/mixed_numeric_string_sample.csv | 3 +
.../tests/samples/non_timestamp_sample.csv | 4 +
.../xloader/tests/samples/non_utf8_sample.csv | 267 ++++++++++++++++++
.../tests/samples/sample_with_blanks.csv | 4 +
.../samples/sample_with_mixed_quotes.csv | 136 +++++++++
.../samples/sample_with_quoted_commas.csv | 4 +
ckanext/xloader/tests/test_loader.py | 105 +++++++
ckanext/xloader/utils.py | 61 +++-
ckanext/xloader/views.py | 12 +-
25 files changed, 724 insertions(+), 81 deletions(-)
create mode 100644 .github/dependabot.yml
create mode 100644 ckanext/xloader/templates/package/resource_read.html
create mode 100644 ckanext/xloader/templates/package/snippets/resource_item.html
create mode 100644 ckanext/xloader/templates/package/snippets/resources.html
create mode 100644 ckanext/xloader/tests/samples/mixed_numeric_string_sample.csv
create mode 100644 ckanext/xloader/tests/samples/non_timestamp_sample.csv
create mode 100644 ckanext/xloader/tests/samples/non_utf8_sample.csv
create mode 100644 ckanext/xloader/tests/samples/sample_with_blanks.csv
create mode 100644 ckanext/xloader/tests/samples/sample_with_mixed_quotes.csv
create mode 100644 ckanext/xloader/tests/samples/sample_with_quoted_commas.csv
diff --git a/.flake8 b/.flake8
index a4eea9e3..32068ca7 100644
--- a/.flake8
+++ b/.flake8
@@ -17,8 +17,4 @@ max-line-length=127
# List ignore rules one per line.
ignore =
- E501
- C901
W503
- F401
- F403
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 00000000..b5158981
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,18 @@
+version: 2
+registries:
+ python-index-pypi-org:
+ type: python-index
+ url: https://pypi.org/
+ replaces-base: true
+ username: "${{secrets.PYTHON_INDEX_PYPI_ORG_USERNAME}}"
+ password: "${{secrets.PYTHON_INDEX_PYPI_ORG_PASSWORD}}"
+
+updates:
+- package-ecosystem: pip
+ directory: "/"
+ schedule:
+ interval: daily
+ time: "19:00"
+ open-pull-requests-limit: 10
+ registries:
+ - python-index-pypi-org
diff --git a/ckanext/xloader/action.py b/ckanext/xloader/action.py
index 3fa26803..e45394a9 100644
--- a/ckanext/xloader/action.py
+++ b/ckanext/xloader/action.py
@@ -152,7 +152,12 @@ def xloader_submit(context, data_dict):
'original_url': resource_dict.get('url'),
}
}
- timeout = config.get('ckanext.xloader.job_timeout', '3600')
+ # Expand timeout for resources that have to be type-guessed
+ timeout = config.get(
+ 'ckanext.xloader.job_timeout',
+ '3600' if utils.datastore_resource_exists(res_id) else '10800')
+ log.debug("Timeout for XLoading resource %s is %s", res_id, timeout)
+
try:
job = enqueue_job(
jobs.xloader_data_into_datastore, [data], rq_kwargs=dict(timeout=timeout)
diff --git a/ckanext/xloader/command.py b/ckanext/xloader/command.py
index 7f2c000a..64b79754 100644
--- a/ckanext/xloader/command.py
+++ b/ckanext/xloader/command.py
@@ -3,6 +3,7 @@
import sys
import logging
import ckan.plugins.toolkit as tk
+from ckanext.xloader.utils import XLoaderFormats
class XloaderCmd:
@@ -84,8 +85,6 @@ def _submit_resource(self, resource, user, indent=0):
'''resource: resource dictionary
'''
indentation = ' ' * indent
- # import here, so that that loggers are setup
- from ckanext.xloader.plugin import XLoaderFormats
if not XLoaderFormats.is_it_an_xloader_format(resource['format']):
print(indentation
diff --git a/ckanext/xloader/config_declaration.yaml b/ckanext/xloader/config_declaration.yaml
index b31f12e2..feb1cc9c 100644
--- a/ckanext/xloader/config_declaration.yaml
+++ b/ckanext/xloader/config_declaration.yaml
@@ -29,9 +29,7 @@ groups:
default: 1_000_000_000
example: 100000
description: |
- The connection string for the jobs database used by XLoader. The
- default of an sqlite file is fine for development. For production use a
- Postgresql database.
+ The maximum file size that XLoader will attempt to load.
type: int
required: false
- key: ckanext.xloader.use_type_guessing
@@ -48,6 +46,15 @@ groups:
type: bool
required: false
legacy_key: ckanext.xloader.just_load_with_messytables
+ - key: ckanext.xloader.max_type_guessing_length
+ default: 0
+ example: 100000
+ description: |
+ The maximum file size that will be passed to Tabulator if the
+ use_type_guessing flag is enabled. Larger files will use COPY even if
+ the flag is set. Defaults to 1/10 of the maximum content length.
+ type: int
+ required: false
- key: ckanext.xloader.parse_dates_dayfirst
default: False
example: False
diff --git a/ckanext/xloader/helpers.py b/ckanext/xloader/helpers.py
index 8c94387a..8b9dee8f 100644
--- a/ckanext/xloader/helpers.py
+++ b/ckanext/xloader/helpers.py
@@ -1,4 +1,5 @@
import ckan.plugins.toolkit as toolkit
+from ckanext.xloader.utils import XLoaderFormats
def xloader_status(resource_id):
@@ -25,3 +26,21 @@ def xloader_status_description(status):
return captions.get(status['status'], status['status'].capitalize())
else:
return _('Not Uploaded Yet')
+
+
+def is_resource_supported_by_xloader(res_dict, check_access=True):
+ is_supported_format = XLoaderFormats.is_it_an_xloader_format(res_dict.get('format'))
+ is_datastore_active = res_dict.get('datastore_active', False)
+ if check_access:
+ user_has_access = toolkit.h.check_access('package_update', {'id': res_dict.get('package_id')})
+ else:
+ user_has_access = True
+ url_type = res_dict.get('url_type')
+ if url_type:
+ try:
+ is_supported_url_type = url_type not in toolkit.h.datastore_rw_resource_url_types()
+ except AttributeError:
+ is_supported_url_type = (url_type == 'upload')
+ else:
+ is_supported_url_type = True
+ return (is_supported_format or is_datastore_active) and user_has_access and is_supported_url_type
diff --git a/ckanext/xloader/jobs.py b/ckanext/xloader/jobs.py
index 4c4068f9..9c6e0a67 100644
--- a/ckanext/xloader/jobs.py
+++ b/ckanext/xloader/jobs.py
@@ -7,6 +7,7 @@
import tempfile
import json
import datetime
+import os
import traceback
import sys
@@ -16,23 +17,26 @@
import sqlalchemy as sa
from ckan import model
-from ckan.plugins.toolkit import get_action, asbool, ObjectNotFound, config, check_ckan_version
+from ckan.plugins.toolkit import get_action, asbool, ObjectNotFound, config
-from . import loader
-from . import db
+from . import db, loader
from .job_exceptions import JobError, HTTPError, DataTooBigError, FileCouldNotBeLoadedError
-from .utils import set_resource_metadata
+from .utils import datastore_resource_exists, set_resource_metadata
try:
from ckan.lib.api_token import get_user_from_token
except ImportError:
get_user_from_token = None
+log = logging.getLogger(__name__)
+
SSL_VERIFY = asbool(config.get('ckanext.xloader.ssl_verify', True))
if not SSL_VERIFY:
requests.packages.urllib3.disable_warnings()
MAX_CONTENT_LENGTH = int(config.get('ckanext.xloader.max_content_length') or 1e9)
+# Don't try Tabulator load on large files
+MAX_TYPE_GUESSING_LENGTH = int(config.get('ckanext.xloader.max_type_guessing_length') or MAX_CONTENT_LENGTH / 10)
MAX_EXCERPT_LINES = int(config.get('ckanext.xloader.max_excerpt_lines') or 0)
CHUNK_SIZE = 16 * 1024 # 16kb
DOWNLOAD_TIMEOUT = 30
@@ -80,7 +84,6 @@ def xloader_data_into_datastore(input):
db.mark_job_as_errored(job_id, str(e))
job_dict['status'] = 'error'
job_dict['error'] = str(e)
- log = logging.getLogger(__name__)
log.error('xloader error: {0}, {1}'.format(e, traceback.format_exc()))
errored = True
except Exception as e:
@@ -88,7 +91,6 @@ def xloader_data_into_datastore(input):
job_id, traceback.format_tb(sys.exc_info()[2])[-1] + repr(e))
job_dict['status'] = 'error'
job_dict['error'] = str(e)
- log = logging.getLogger(__name__)
log.error('xloader error: {0}, {1}'.format(e, traceback.format_exc()))
errored = True
finally:
@@ -206,11 +208,12 @@ def tabulator_load():
logger.info('Loading CSV')
# If ckanext.xloader.use_type_guessing is not configured, fall back to
# deprecated ckanext.xloader.just_load_with_messytables
- use_type_guessing = asbool(config.get(
- 'ckanext.xloader.use_type_guessing', config.get(
- 'ckanext.xloader.just_load_with_messytables', False)))
- logger.info("'use_type_guessing' mode is: %s",
- use_type_guessing)
+ use_type_guessing = asbool(
+ config.get('ckanext.xloader.use_type_guessing', config.get(
+ 'ckanext.xloader.just_load_with_messytables', False))) \
+ and not datastore_resource_exists(resource['id']) \
+ and os.path.getsize(tmp_file.name) <= MAX_TYPE_GUESSING_LENGTH
+ logger.info("'use_type_guessing' mode is: %s", use_type_guessing)
try:
if use_type_guessing:
tabulator_load()
diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py
index afc3c980..46c38dec 100644
--- a/ckanext/xloader/loader.py
+++ b/ckanext/xloader/loader.py
@@ -18,7 +18,7 @@
from .job_exceptions import FileCouldNotBeLoadedError, LoaderError
from .parser import XloaderCSVParser
-from .utils import headers_guess, type_guess
+from .utils import datastore_resource_exists, headers_guess, type_guess
from ckan.plugins.toolkit import config
@@ -318,9 +318,16 @@ def row_iterator():
logger.info('Copying to database...')
count = 0
+ # Some types cannot be stored as empty strings and must be converted to None,
+ # https://github.com/ckan/ckanext-xloader/issues/182
+ non_empty_types = ['timestamp', 'numeric']
for i, records in enumerate(chunky(result, 250)):
count += len(records)
logger.info('Saving chunk {number}'.format(number=i))
+ for row in records:
+ for column_index, column_name in enumerate(row):
+ if headers_dicts[column_index]['type'] in non_empty_types and row[column_name] == '':
+ row[column_name] = None
send_resource_to_datastore(resource_id, headers_dicts, records)
logger.info('...copying done')
@@ -395,17 +402,6 @@ def send_resource_to_datastore(resource_id, headers, records):
.format(str(e)))
-def datastore_resource_exists(resource_id):
- from ckan import model
- context = {'model': model, 'ignore_auth': True}
- try:
- response = p.toolkit.get_action('datastore_search')(context, dict(
- id=resource_id, limit=0))
- except p.toolkit.ObjectNotFound:
- return False
- return response or {'fields': []}
-
-
def delete_datastore_resource(resource_id):
from ckan import model
context = {'model': model, 'user': '', 'ignore_auth': True}
diff --git a/ckanext/xloader/plugin.py b/ckanext/xloader/plugin.py
index dbde8ed5..1b8417b9 100644
--- a/ckanext/xloader/plugin.py
+++ b/ckanext/xloader/plugin.py
@@ -6,7 +6,7 @@
from ckan.plugins import toolkit
from . import action, auth, helpers as xloader_helpers, utils
-from .loader import fulltext_function_exists, get_write_engine
+from ckanext.xloader.utils import XLoaderFormats
try:
config_declarations = toolkit.blanket.config_declarations
@@ -19,36 +19,6 @@ def config_declarations(cls):
log = logging.getLogger(__name__)
-# resource.formats accepted by ckanext-xloader. Must be lowercase here.
-DEFAULT_FORMATS = [
- "csv",
- "application/csv",
- "xls",
- "xlsx",
- "tsv",
- "application/vnd.ms-excel",
- "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
- "ods",
- "application/vnd.oasis.opendocument.spreadsheet",
-]
-
-
-class XLoaderFormats(object):
- formats = None
-
- @classmethod
- def is_it_an_xloader_format(cls, format_):
- if cls.formats is None:
- cls._formats = toolkit.config.get("ckanext.xloader.formats")
- if cls._formats is not None:
- cls._formats = cls._formats.lower().split()
- else:
- cls._formats = DEFAULT_FORMATS
- if not format_:
- return False
- return format_.lower() in cls._formats
-
-
@config_declarations
class xloaderPlugin(plugins.SingletonPlugin):
plugins.implements(plugins.IConfigurer)
@@ -211,4 +181,5 @@ def get_helpers(self):
return {
"xloader_status": xloader_helpers.xloader_status,
"xloader_status_description": xloader_helpers.xloader_status_description,
+ "is_resource_supported_by_xloader": xloader_helpers.is_resource_supported_by_xloader,
}
diff --git a/ckanext/xloader/templates/package/resource_edit_base.html b/ckanext/xloader/templates/package/resource_edit_base.html
index 34403521..5c02815a 100644
--- a/ckanext/xloader/templates/package/resource_edit_base.html
+++ b/ckanext/xloader/templates/package/resource_edit_base.html
@@ -2,5 +2,7 @@
{% block inner_primary_nav %}
{{ super() }}
- {{ h.build_nav_icon('xloader.resource_data', _('DataStore'), id=pkg.name, resource_id=res.id) }}
+ {% if h.is_resource_supported_by_xloader(res) %}
+ {{ h.build_nav_icon('xloader.resource_data', _('DataStore'), id=pkg.name, resource_id=res.id, icon='cloud-upload') }}
+ {% endif %}
{% endblock %}
diff --git a/ckanext/xloader/templates/package/resource_read.html b/ckanext/xloader/templates/package/resource_read.html
new file mode 100644
index 00000000..56bf0266
--- /dev/null
+++ b/ckanext/xloader/templates/package/resource_read.html
@@ -0,0 +1,8 @@
+{% ckan_extends %}
+
+{% block action_manage %}
+ {{ super() }}
+ {% if h.is_resource_supported_by_xloader(res) %}
+ {% link_for _('DataStore'), named_route='xloader.resource_data', id=pkg.name, resource_id=res.id, class_='btn btn-light', icon='cloud-upload' %}
+ {% endif %}
+{% endblock %}
diff --git a/ckanext/xloader/templates/package/snippets/resource_item.html b/ckanext/xloader/templates/package/snippets/resource_item.html
new file mode 100644
index 00000000..37ed457c
--- /dev/null
+++ b/ckanext/xloader/templates/package/snippets/resource_item.html
@@ -0,0 +1,8 @@
+{% ckan_extends %}
+
+{% block resource_item_explore_inner %}
+ {{ super() }}
+ {% if h.is_resource_supported_by_xloader(res) %}
+ {% link_for _('DataStore'), named_route='xloader.resource_data', id=pkg.name, resource_id=res.id, class_='dropdown-item', icon='cloud-upload' %}
+ {% endif %}
+{% endblock %}
diff --git a/ckanext/xloader/templates/package/snippets/resources.html b/ckanext/xloader/templates/package/snippets/resources.html
new file mode 100644
index 00000000..e04dde4d
--- /dev/null
+++ b/ckanext/xloader/templates/package/snippets/resources.html
@@ -0,0 +1,8 @@
+{% ckan_extends %}
+
+{% block resources_list_edit_dropdown_inner %}
+ {{ super() }}
+ {% if h.is_resource_supported_by_xloader(resource) %}
+ {% link_for _('DataStore'), named_route='xloader.resource_data', id=pkg.name, resource_id=resource.id, class_='dropdown-item', icon='cloud-upload' %}
+ {% endif %}
+{% endblock %}
diff --git a/ckanext/xloader/templates/xloader/resource_data.html b/ckanext/xloader/templates/xloader/resource_data.html
index a94ad631..d9a22058 100644
--- a/ckanext/xloader/templates/xloader/resource_data.html
+++ b/ckanext/xloader/templates/xloader/resource_data.html
@@ -61,7 +61,25 @@
{% if status.status and status.task_info and show_table %}
{{ _('Upload Log') }}
+ {% elif loop.index == rows + 1 %}
+
+
+
+ Skipping {{ skipped_rows }} logs...
+
+
+ Show more Show all
+
+
+
+ {% endif %}
{% endfor %}
diff --git a/ckanext/xloader/tests/ckan_setup.py b/ckanext/xloader/tests/ckan_setup.py
index ae8bfb3e..ff43d74c 100644
--- a/ckanext/xloader/tests/ckan_setup.py
+++ b/ckanext/xloader/tests/ckan_setup.py
@@ -1,5 +1,5 @@
try:
- from ckan.tests.pytest_ckan.ckan_setup import *
+ from ckan.tests.pytest_ckan.ckan_setup import * # noqa
except ImportError:
import pkg_resources
from paste.deploy import loadapp
diff --git a/ckanext/xloader/tests/fixtures.py b/ckanext/xloader/tests/fixtures.py
index f43916ab..9a7ad37f 100644
--- a/ckanext/xloader/tests/fixtures.py
+++ b/ckanext/xloader/tests/fixtures.py
@@ -1,6 +1,5 @@
# -*- coding: utf-8 -*-
-import sqlalchemy
-import sqlalchemy.orm as orm
+from sqlalchemy import orm
import os
from ckanext.datastore.tests import helpers as datastore_helpers
@@ -11,7 +10,7 @@
)
try:
- from ckan.tests.pytest_ckan.fixtures import *
+ from ckan.tests.pytest_ckan.fixtures import * # noqa
except ImportError:
import pytest
diff --git a/ckanext/xloader/tests/samples/mixed_numeric_string_sample.csv b/ckanext/xloader/tests/samples/mixed_numeric_string_sample.csv
new file mode 100644
index 00000000..9d076602
--- /dev/null
+++ b/ckanext/xloader/tests/samples/mixed_numeric_string_sample.csv
@@ -0,0 +1,3 @@
+Funding agency,Program title,Maximum (indicative) grant amount
+DTIS,Accessible Tourism Infrastructure Grants,Five hundred thousand dollars
+DTIS,Boosting Accessible Tourism Experiences Grants,5000
diff --git a/ckanext/xloader/tests/samples/non_timestamp_sample.csv b/ckanext/xloader/tests/samples/non_timestamp_sample.csv
new file mode 100644
index 00000000..d1b39e90
--- /dev/null
+++ b/ckanext/xloader/tests/samples/non_timestamp_sample.csv
@@ -0,0 +1,4 @@
+Title,Postal postcode,Latitude,Longitude,Mon am,Mon pm,Last updated
+Adavale,4474,-25.9092582,144.5975769,8:00,16:00,19/07/2018
+Aramac,4726,-22.971298,145.241481,9:00-13:00,14:00-16:45,17/07/2018
+Barcaldine,4725,-23.55327901,145.289156,9:00-12:30,13:30-16:30,20/07/2018
diff --git a/ckanext/xloader/tests/samples/non_utf8_sample.csv b/ckanext/xloader/tests/samples/non_utf8_sample.csv
new file mode 100644
index 00000000..334c1005
--- /dev/null
+++ b/ckanext/xloader/tests/samples/non_utf8_sample.csv
@@ -0,0 +1,267 @@
+"ClientId_ActNo","Owner","Amount","SenderName","DateRec","PCode"
+"206681442213","MS MARIE LOUISE SEXTON ","477.05","VIRGIN AUSTRALIA HOLDINGS LIMITED","2012-02-28 00:00:00","3206"
+"206681442214","MR DAVID SHEARER","3.79","VIRGIN AUSTRALIA HOLDINGS LIMITED","2012-02-28 00:00:00","2213"
+"206681442215","MRS M SHONK + MR E T SHONK ","10.3","VIRGIN AUSTRALIA HOLDINGS LIMITED","2012-02-28 00:00:00","2093"
+"206681442216","MS AGATHA SKOURTIS","108.42","VIRGIN AUSTRALIA HOLDINGS LIMITED","2012-02-28 00:00:00","3025"
+"206681442217","MR JAMES SMITH","108.42","VIRGIN AUSTRALIA HOLDINGS LIMITED","2012-02-28 00:00:00","4811"
+"206681442218","MRS JILLIAN MELINDA SMITH","602.27","VIRGIN AUSTRALIA HOLDINGS LIMITED","2012-02-28 00:00:00","2752"
+"206681442219","MISS JESSICA SARAH STEAD","174.01","VIRGIN AUSTRALIA HOLDINGS LIMITED","2012-02-28 00:00:00","2040"
+"206681442220","MISS CHAU DONG MINH TANG","542.1","VIRGIN AUSTRALIA HOLDINGS LIMITED","2012-02-28 00:00:00","3065"
+"206681442221","MR TROY TAYLOR","240.69","VIRGIN AUSTRALIA HOLDINGS LIMITED","2012-02-28 00:00:00","4000"
+"206681442222","MR ANDREW PHILIP THOMPSON","2.17","VIRGIN AUSTRALIA HOLDINGS LIMITED","2012-02-28 00:00:00","2204"
+"206681442223","MR IVAN CONRAD TIMBS","702.02","VIRGIN AUSTRALIA HOLDINGS LIMITED","2012-02-28 00:00:00","2612"
+"206681442224","MR J WAJNTRAUB + MRS S WAJNTRAUB ","542.1","VIRGIN AUSTRALIA HOLDINGS LIMITED","2012-02-28 00:00:00","3205"
+"206681442225","MR HOWARD GRENVILLE WEBBER","400.61","VIRGIN AUSTRALIA HOLDINGS LIMITED","2012-02-28 00:00:00","4556"
+"206681442226","JANI ILARI KALLA","10","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","6157"
+"206681442227","GARY JOHN & DESLEY L CAHILL","10","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4816"
+"206681442228","CARMEL ANASTASIA MEAGLIA","10","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","2205"
+"206681442229","ASHLEY & ANNIE BRUGGEMANN","10","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4671"
+"206681442230","TERRY & MARY RITCHIE","10","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4069"
+"206681442231","BODY CORPORATE VILLAGE WAY CTS 19459","10","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4214"
+"206681442232","MATHEW JOHN SHORTLAND","10","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","2573"
+"206681442233","TANYA MARIE TOWNSON","10.01","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4814"
+"206681442234","VENEE ELVA RUSSELL","10.02","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4035"
+"206681442235","ELIZABETH FERNANCE","10.03","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4223"
+"206681442236","CHARLES JOHN & OLWYN MARTIN","10.04","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4121"
+"206681442237","ALFRED BRETT SEILER","10.05","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4129"
+"206681442238","LOUISE WOODHAM & NATHAN FREY","10.07","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4400"
+"206681442239","MITRA KHAKBAZ","10.09","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4005"
+"206681442240","ALLAN EDWARD KILCULLEN","10.1","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4817"
+"206681442241","BEVAN JOHN LISTON","10.11","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4350"
+"206681442242","KRIS MICHAEL KANKAHAINEN","10.11","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4107"
+"206681442243","MICHAEL LYNN","10.16","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4860"
+"206681442244","ALAN RAYMOND & GERAL BURKITT","10.19","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4228"
+"206681442245","JENNIFER & NEVILLE MARXSEN","10.19","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4680"
+"206681442246","DARREN MAIN GRANT & LISA MARIE GROSSKOPF","10.2","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4504"
+"206681442247","PEARSON AUTOMOTIVE","10.23","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4064"
+"206681442248","MR SHANE HOPE & MISS YVONNE HILTON","10.24","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4173"
+"206681442249","CARMEL LESLEY NEILSON & WAYNE MERVYN NEILSON &","10.24","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4650"
+"206681442250","STEPHEN KENNETH ROBERTSON","10.24","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4740"
+"206681442251","SHIH CHE LIN","10.26","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4214"
+"206681442252","DAVID BRETT BROWNE","10.29","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4558"
+"206681442253","NEVILLE COLIN WOODHOUSE","10.32","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4814"
+"206681442254","DARRYN GREGORY & PET ROBIN","10.34","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4178"
+"206681442255","DUDLEY JESSER","10.38","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4814"
+"206681442256","MURRAY JOHN & SANDRA DIXON","10.38","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4870"
+"206681442257","SHATHISO JOHNSON BAREKI","10.38","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4215"
+"206681442258","ARTHUR EDWARD & MAUR MACDONALD","10.39","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4390"
+"206681442259","GARY GOLDBERG","10.4","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","2480"
+"206681442260","PHUONG VAN NGO","10.41","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4508"
+"206681442261","JACQUELYN WILSON","10.42","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","3046"
+"206681442262","GARTH TURTON","10.42","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4051"
+"206681442263","DAVID JAMES & ANNE M O'ROURKE","10.43","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4701"
+"206681442264","ROBERT RUSSELL & VER MCKENZIE","10.45","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4503"
+"206681442265","ESTATE OF DULCIE L SYKES","10.48","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4215"
+"206681442266","LEESA GAYE OSMOND","10.51","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4671"
+"206681442267","DAVID JOHN & ROSEMAR GILES","10.54","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4303"
+"206681442268","SALLY & AQEEL AHMED","10.56","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4350"
+"206681442269","JUDITH MARJORY BURGESS","10.59","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","3101"
+"206681442270","TROY ANTONY EWART","10.61","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4301"
+"206681442271","RODULFO MANOY & GEORGE HAJEK","10.62","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4152"
+"206681442272","GLEN DUNSTAN","10.66","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","3196"
+"206681442273","ANNE RALSTON WRIGHT","10.73","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4825"
+"206681442274","ALAN & NICOLE MAREE JACKSON","10.74","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4720"
+"206681442275","DANIEL MALCOLM BROWN","10.81","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4501"
+"206681442276","JENNIFER DEMERAL","10.82","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4214"
+"206681442277","DARREN & LISA GARRETT","10.83","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4165"
+"206681442278","LORRAINE & PETER JACKSON","10.84","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4740"
+"206681442279","CHERYL MADELINE CAMPBELL","10.86","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4824"
+"206681442280","OLAF PETER PRILL","10.89","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4305"
+"206681442281","AJAY GIDH","10.9","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4051"
+"206681442282","DEBRA JOANNE PRINDABLE","10.9","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4178"
+"206681442283","MATTHEW WILLIAM CLARKE","10.96","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","2914"
+"206681442284","MARK STANLEY MCKENZIE","11","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4207"
+"206681442285","TREVOR & JANICE GARWOOD","11","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4610"
+"206681442286","LISA ANNE BRATINA","11","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4228"
+"206681442287","MICHAEL GEORGE KIRKWOOD","11","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4561"
+"206681442288","STEPHAN & JULIE BAWDEN","11.04","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4114"
+"206681442289","PETER JOHN BOURKE","11.04","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4118"
+"206681442290","TYRONE PAGE & ULRIKE","11.07","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4301"
+"206681442291","SIMON ROBERT GRAY","11.08","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4006"
+"206681442292","ALLAN NICHOLAS SCHWARZROCK","11.12","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4650"
+"206681442293","IVAN J BLAKE & JAINE RIGTER","11.12","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4220"
+"206681442294","DAVID MATTHEW REGINA CHRISTIE","11.12","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4151"
+"206681442295","GEOFFREY WAYNE & EVAN GRIGG","11.14","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4720"
+"206681442296","KYLIE JANELLE HARDCASTLE","11.14","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4013"
+"206681442297","PAMELA ANN WELLER","11.15","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4655"
+"206681442298","JASON PATRICK & ELIZ MURPHY","11.16","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4511"
+"206681442299","MLADEN & VESNA SAJKO","11.19","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4157"
+"206681442300","DEAN STEPHEN BROCKENSHIRE","11.19","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","2571"
+"206681442301","LISA CHRISTOBEL BOWKER","11.22","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4066"
+"206681442302","MATTHEW RAY EBBAGE","11.24","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4101"
+"206681442303","BRIAN & GEORGINA WHITLEY","11.25","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4521"
+"206681442304","HAYLEY WESTON","11.25","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4159"
+"206681442305","JAMES PATRICK HOCKING","11.28","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4127"
+"206681442306","ROBERT ANDREW & SARA BROWNHALL","11.29","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4069"
+"206681442307","EDWARD JAMES DODGSON","11.3","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4069"
+"206681442308","MELISSA JOY DODD","11.32","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4069"
+"206681442309","JOSHUA CALVIN BEGENT","11.38","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4306"
+"206681442311","DORATHY AMANDA WALTERS","11.4","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4744"
+"206681442312","RICHARD ROBERTS & KYM RALEIGH","11.4","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4053"
+"206681442313","SAMARA INSOLL","11.48","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4212"
+"206681442314","NEIL GREGORY FLESSER","11.49","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4800"
+"206681442315","EUNICE GLADYS WILBRAHAM","11.51","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4570"
+"206681442316","KARA NICOLE MCINNES","11.57","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4503"
+"206681442317","DAVID BLYTH","11.58","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4078"
+"206681442318","KEVIN & MARION KEIR","11.58","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4216"
+"206681442319","FRANCES & CHARLES KEEBLE","11.59","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4500"
+"206681442320","LYNETTE ANNE & PETER NISSEN","11.6","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4069"
+"206681442321","DANIEL PETER JOHNSON","11.61","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4051"
+"206681442322","ALLAN & EUNICE DELLAWAY","11.62","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4122"
+"206681442323","CHRISTOPHER JOHN BEEM","11.63","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4101"
+"206681442324","DAVID JAMES & KELLIE POULTON","11.64","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4350"
+"206681442325","MAVIS CAROLIN SCOTT","11.64","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4018"
+"206681442326","REEGAN & ADAM MARTIN","11.68","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","2153"
+"206681442327","DENYSE B BONNEY","11.7","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4811"
+"206681442328","JAMES ANDERSON","11.71","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4220"
+"206681442329","SUSANNAH PINTER","11.72","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4744"
+"206681442330","BRENTON MARK & KAREN GARNETT","11.78","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4306"
+"206681442331","PL CAMELOT VENTURES AS TRUSTEE FOR K F T TRUST NO","11.82","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4215"
+"206681442332","RON HENRY SCHMIDT","11.84","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","830"
+"206681442333","ROSS COCKBURN & AUDREY KILL","11.86","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4010"
+"206681442334","BENJAMIN CLARK","11.88","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4306"
+"206681442335","IRIS LEAH TERESA BAKER","11.9","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","2170"
+"206681442336","MARK JOHN DEEBLE","11.94","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4740"
+"206681442337","CHRISTINE & BARRY RIGBY","11.94","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","2485"
+"206681442338","NATASHA ANN WOODWARD","11.97","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4869"
+"206681442339","BENJAMIN JOHN CANSDALE","11.98","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4064"
+"206681442340","PETER HERALD","11.98","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4184"
+"206681442341","SIMON CUSHWAY","11.99","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4154"
+"206681442342","ANTHONY & MICHELLE JOHNSTON","12","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4551"
+"206681442343","PAUL HAUCK","12.03","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4000"
+"206681442344","RONALD ALBERT & PEAR NORTHILL","12.03","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4413"
+"206681442345","ROBYN ELLEN SOMERS","12.03","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4178"
+"206681442346","ROSE ANN HODGMAN","12.06","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4068"
+"206681442347","JOHN & MARDI BOLTON","12.09","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4165"
+"206681442348","KRYSTYNA RENNIE","12.09","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4053"
+"206681442349","JOANNE BARSBY","12.12","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4350"
+"206681442350","BRENDAN JAMES FELSCHOW","12.14","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4508"
+"206681442351","MARTIN WILLIAM HARRISON","12.16","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4870"
+"206681442352","PATRICK HEINEMANN","12.16","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4870"
+"206681442353","ELEKRA & SPENCER RORIE","12.17","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4211"
+"206681442354","ROBERT CLIVE & NOELE CROCKER","12.19","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4211"
+"206681442355","DANIEL JOSEPH & DAVI CARMICHAEL","12.21","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4160"
+"206681442356","WENBO JIANG & XIU FAN CHEN","12.24","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4303"
+"206681442357","NOEL JEFFREY BRADY","12.27","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4550"
+"206681442358","DARREN RICHARD GOSSNER & MATTHEW JOHN ANDERSON","12.29","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4500"
+"206681442359","STEPHEN MICHAEL & MA JOLLY","12.3","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4350"
+"206681442360","SHONA & ARCHIE WALLACE","12.34","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4504"
+"206681442361","ZOFIA HYS","12.34","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4300"
+"206681442362","PIROSKA KING","12.38","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4154"
+"206681442363","ARVIN CHAND & AMITA MOHINI","12.38","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4503"
+"206681442364","WIETSKE GERARDINA & GAUNT","12.38","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4309"
+"206681442365","MARK REGINALD MATTHEWS","12.39","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4811"
+"206681442366","SHARP ARLEEN & CLINTON","12.4","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","6020"
+"206681442367","EMOKE & LASZLO & MAR ZSOLDOS","12.41","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4005"
+"206681442368","MARK & KARON KELLER","12.42","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4702"
+"206681442369","JODIE KATRINA & TONY MCLACHLAN","12.43","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4350"
+"206681442370","ALAN WARWICK & LINDA LEWIS","12.45","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4670"
+"206681442371","ADRIAN WAYNE LORRAWAY","12.5","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4702"
+"206681442372","NICHOLE KRISTY MIKLOS","12.53","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4152"
+"206681442373","NATASHA LEANNE HAYES","12.54","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4017"
+"206681442374","KAREN LEE & DARREN J SHEEHAN","12.55","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4516"
+"206681442375","RACHAEL MAY COLLINS-COOK","12.58","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4211"
+"206681442376","TAMARA JUNE WEIGHT & SUSANNE ELIZABETH DEVINE","12.59","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4814"
+"206681442377","RODNEY GATES","12.59","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","7015"
+"206681442378","REBECCA & LEE-ANNE SMITH","12.61","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","830"
+"206681442379","ADAM WILLIAM JOHNSON","12.62","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4069"
+"206681442380","ZAC ASHLEY & ALEXAND MORGAN","12.63","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4165"
+"206681442381","HILARY SEALY","12.64","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4211"
+"206681442382","NAOMI JOHNSTONE & SCOTT LENAN","12.68","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4207"
+"206681442383","WAYNE FLICKER","12.7","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","2756"
+"206681442384","BRENDA ANDERSON","12.71","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4811"
+"206681442385","MATTHEW JAMES ALLEN","12.71","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4002"
+"206681442386","MARIA-THERESIA ALTENHOFEN-CROSS & JOHN ERI CROSS","12.72","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4570"
+"206681442387","MELODIE ZYLSTRA","12.72","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4570"
+"206681442388","AMANDA & GRAHAM SWALLOW","12.75","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4720"
+"206681442389","GRAEME ROBERT & ROBI DOHERTY","12.75","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4214"
+"206681442390","GILLIAN LEIGH O'SULLIVAN","12.79","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4817"
+"206681442391","JULIA MELLICK","12.84","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4178"
+"206681442392","TOLISIALE & HAMAKO MAHINA","12.87","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4300"
+"206681442393","SIMON JOHN STEVENS","12.89","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4305"
+"206681442394","MICHAEL ANTHONY & DE SNELSON","12.89","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4817"
+"206681442395","QUERIDA JO LOFTES","12.89","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4812"
+"206681442396","LORRAINE VICTORIA DIAS","12.89","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4000"
+"206681442397","JOHN MICHAEL TRAVIS LINLEY","12.92","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4051"
+"206681442398","CAROLINE HENDRY & RICHARD HOPKINS","12.93","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4067"
+"206681442399","JOSH EAGLE","12.95","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4030"
+"206681442400","MARK SHAWN FROST & BELINDA JEAN MARSHALL","12.95","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4221"
+"206681442401","BRENT & GABRIELLE ANTHONY","12.96","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4213"
+"206681442402","RICHARD SADLER","12.98","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4065"
+"206681442403","GROVE FRUIT JUICE PTY LTD","13","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4122"
+"206681442404","LEAH SPARKS","13","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4561"
+"206681442405","JAMES MAURICE & PATR GORDON","13","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4870"
+"206681442406","MARK JOSEPH SEARS","13","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4565"
+"206681442407","SOPHIE VICTORIA STEWART & TREVOR MATTHEW ROWE","13","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4055"
+"206681442408","BOBBY JAMES & SIMONE TAYLOR","13.02","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","6254"
+"206681442409","PATRICK MICHAEL & ME REEVES","13.08","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4101"
+"206681442410","MAURICE GROGNUZ","13.09","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4670"
+"206681442411","ALAN PIGOTT & ALAN CONDER","13.11","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","2025"
+"206681442412","SAMANTHA & CAMERON SCHELBACH","13.16","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4309"
+"206681442413","SHERIDAN ANNE ST CLAIR","13.16","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4164"
+"206681442414","ANDREW CHRISTIE","13.17","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4521"
+"206681442415","MARK ANDREW & MELISS VINTON","13.17","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4508"
+"206681442416","IRWIN DOUGLAS & MARI SORENSEN","13.2","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4305"
+"206681442417","CARLY SUSAN BENNETTS","13.23","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4034"
+"206681442418","RYAN THORNTON","13.24","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","2560"
+"206681442419","RICHARD BAILEY","13.26","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","3850"
+"206681442420","DAVID IAN & EMILY RU PRYOR","13.27","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4120"
+"206681442421","WILLIAM SINCLAIR","13.3","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4514"
+"206681442422","CATHERINE LUCILLE VALENTINE & ROBERT WAREING","13.3","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4165"
+"206681442423","RAYMOND JAMES JONES","13.3","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4170"
+"206681442424","ANDREW STEWART T/A AWE COMMUNICATIONS","13.3","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4207"
+"206681442425","TONY RONALD OSBOURNE","13.35","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4311"
+"206681442426","MARK JOHN & LENY FIG O'HARA","13.35","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4825"
+"206681442427","CECILIA ASHLEY & DAV BUTLER","13.35","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4816"
+"206681442428","WILLIAM LEATHAM","13.36","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4170"
+"206681442429","MAXWELL RAYMOND MATHERS & DENISE MAREE MELLARE","13.44","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4129"
+"206681442430","RENE & JACQUELINE WASSERFUHR","13.44","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4556"
+"206681442431","MICHAEL LEIGH KENNEDY","13.48","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4610"
+"206681442432","MEDECO MEDICAL CENTRE BEENLEIGH","13.5","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4207"
+"206681442433","GARY PAUL & GAYE SHELLEY","13.5","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4510"
+"206681442434","STEVE & BRENDA GEIGER","13.53","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4740"
+"206681442435","GREGORY BERNARD JAMES","13.53","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4051"
+"206681442436","ROBBIE DEEBLE","13.56","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4740"
+"206681442437","OWEN TRAYNOR","13.56","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","6076"
+"206681442438","TONI MICHELLE & SHAN MORGAN","13.59","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4717"
+"206681442439","NICOLAS VAN HORTON","13.59","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4220"
+"206681442440","IAN BOWDEN","13.6","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4886"
+"206681442441","QUEENSLAND COUNTRY CREDIT UNION - JIMBOOMBA","13.61","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4814"
+"206681442442","ALANA FELLINGHAM","13.62","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4551"
+"206681442443","ALLAN JOHN & CARMEL BETHEL","13.62","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4871"
+"206681442444","PETER WILLIAM & ODET NORMAN","13.63","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4350"
+"206681442445","EMILY & MATTHEW PARSLOW","13.68","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4173"
+"206681442446","JAMES OI YUEN GOCK","13.69","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","2049"
+"206681442447","JODIE ELIZABETH MORRISON","13.7","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4280"
+"206681442448","BELINDA JANE HARNETT-PETERS & RANDALL NEI PETERS","13.74","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4017"
+"206681442449","JULIEN & CHRISTIAN JUVIGNY","13.78","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4215"
+"206681442450","SUSAN JOY MURRAY & THOMAS HOGAN","13.79","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4217"
+"206681442451","PATRICK COLIN & HEAT HARRIS","13.8","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4506"
+"206681442452","LINDY BOTHA","13.84","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4154"
+"206681442453","PATRICIA LORETTA & D KNIGHT","13.85","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4650"
+"206681442454","COWBURN CONSULTING PTY LTD","13.87","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4000"
+"206681442455","SPENCER JAMES HAMILTON","13.9","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4300"
+"206681442456","ANNA LOUISE ROSS","13.95","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4170"
+"206681442457","JOHN HUGH & BOB SUTHERLAND","13.98","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4509"
+"206681442458","ROBERTA MARY MACNEE","13.99","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4567"
+"206681442459","MATTHEW CHRISTENSEN","14.03","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4152"
+"206681442460","TROY & KIRSTY JEFFRIES","14.04","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4370"
+"206681442461","WILLIAM GEORGE BALSDON","14.05","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4878"
+"206681442462","JAIME LISA CAMPBELL & DANIEL BEVERIDGE","14.07","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4216"
+"206681442463","NANCY JOHANNESSON","14.11","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4505"
+"206681442464","JOSHUA FRANK SEIDL","14.11","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4122"
+"206681442465","DAVID LESTER","14.16","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4817"
+"206681442466","MATHIAS DONALD","14.16","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4103"
+"206681442467","GLEN EVAN & HAYLEE L MARTIN","14.19","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4350"
+"206681442468","JOHN GORDON EVANS","14.19","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4814"
+"206681442469","DIANA NOYCE & LAURENCE VIZER T/A","14.2","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4502"
+"206681442470","GREIG MANLEY","14.22","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","3040"
+"206681442471","BRENDON ANSELL","14.23","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4171"
+"206681442472","CATHERINE A ROBERTSON & PAUL BROMILEY","14.27","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4064"
+"206681442473","ADAM LEE & SAMANTHA RANKIN","14.28","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4132"
+"206681442474","BERNICE BOYS","14.34","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4011"
+"206681442475","HAYLEY MICHELLE BURROW","14.34","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","2153"
+"206681442476","SIONE FAUMUINA","14.42","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4815"
+"206681442477","GERARD JARMAN","14.44","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","3337"
+"206681442478","DOUGLAS CECIL GOOLEY","14.48","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","2481"
+"206681442479","ANTHONY AUGUSTO HENRIQUES T/A CAFÚ VILA FRANCA","14.5","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4020"
diff --git a/ckanext/xloader/tests/samples/sample_with_blanks.csv b/ckanext/xloader/tests/samples/sample_with_blanks.csv
new file mode 100644
index 00000000..b53b25db
--- /dev/null
+++ b/ckanext/xloader/tests/samples/sample_with_blanks.csv
@@ -0,0 +1,4 @@
+Funding agency,Program title,Opening date,Service ID
+DTIS,Visitor First Experiences Fund,23/03/2023,63039
+DTIS,First Nations Sport and Recreation Program Round 2,22/03/2023,63040
+,,,63041
diff --git a/ckanext/xloader/tests/samples/sample_with_mixed_quotes.csv b/ckanext/xloader/tests/samples/sample_with_mixed_quotes.csv
new file mode 100644
index 00000000..a9527cf7
--- /dev/null
+++ b/ckanext/xloader/tests/samples/sample_with_mixed_quotes.csv
@@ -0,0 +1,136 @@
+Category,Category name,Priority,Initiative name,Investment objectives,Primary digital priority,Initiative stage,Actual start date,Approved end date,Date data current at,Percentage complete,Overall status,Project commencement allocation,Approved expenditure,Actual cost to date,Scope change event,Cost re-evaluation event,Delivery delay event,Project journey and reasons for variance,Learn more (URL)
+DDSSHHESW,"Department of Defence, Social Security, Health, Housing, Education, and Silly Walks",High,Silly Walks project - Stage 2,"Lorum ipsum.",Collaboration,Delivery,01/07/1970,30/06/1971,31/03/1971,41,G,5633000,5739000,2352000,N,N,N,"As at 31 March 1971
+- Overall 'green' (on track) status
+- Revised user journey following results of Silly Walk UX/UI testing
+- Transition to support progressing with documentation and walk-through of the solution.
+- Ongoing high levels of silly walk usage reflecting the success of search engine marketing. Silly walk focused campaign to further increase awareness and usage is being finalised.
+
+As at 28 February 1971
+- Overall 'green' (on track) status
+- Results of Silly Walk UX/UI testing is guiding development of the revised user journey.
+- Silly Walk transition to BAU support continuing with workshops, showcases and handover documentation.
+- Silly Walk usage is increasing
+
+As at 31 January 1971
+- Continued amber status [closely monitored] with risks under management
+- Search Engine Marketing -'Always On' yielding good results with continued increase in users and the proportion benefitting from Silly Walk
+- Good progress on development of revised Silly Walk user journey.
+
+As at 31 December 1970
+Status AMBER [Closely monitored]
+- Search Engine Marketing commenced 19 December 1970 and already showing increased users and proportion of customers benefitting from Silly Walk
+- External assurance review completed and reported 'green' rating for confidence of delivery.
+
+As at 30 November 1970
+- Continued amber status pending risk management
+- Marketing to commence to increase awareness of platform
+- Good progress on development of revised user journey
+
+As at 31 October 1970
+Status AMBER [Closely monitored]
+- Silly Walk Stage 2 continue reporting amber status reflective of ongoing high-level risks associated with demand-driven labour-market conditions and planned transition to support.
+- Communications and engagement are in progress.
+- The revised user journey continues development and testing. This is planned to be ready for release in the first quarter of 1971. As at 30 September 1970
+Status AMBER [Closely monitored]
+Project journey events:
+- A revised customer journey in line with outcomes of customer testing and retesting to validate solution usefulness continues to progress.
+- Silly Walk industries expanded to include all industries.
+- Engagement with agencies continues, to heighten Silly Walk awareness and complete validation following recent expansion to encompass all industries.
+
+As at 31 August 1970
+Status GREEN [On track]
+The project is reporting green overall. Ongoing resourcing risk will continue to be monitored and managed for the life of the project, due to a tight labour market.
+Project journey events:
+- A revised customer journey in line with outcomes of customer testing and retesting to validate solution usefulness continues to progress.
+- Further analysis of June/July 1970 marketing campaign has offered recommendations for consideration, to improve target audience awareness and Silly Walk uptake.
+- Silly Walk industries expanded to include Retail Trade, Accommodation and Non-residential Construction industries finalised.
+- Engagement with agencies continues, to heighten Silly Walk awareness and complete validation following recent expansion with three additional industries.
+
+As at 31 July 1970
+Status AMBER [Closely monitored]
+The project is continuing to report amber overall mainly due to ongoing resourcing challenges.
+Project journey events:
+- A revised customer journey in line with outcomes of customer testing and retesting to validate solution usefulness, is progressing.
+- Analysis of a major marketing campaign conducted in June/July 1970 showed a significant step-up in number of Silly Walk users.
+- The target of 95% of Circus population coverage was met in June 1970 with 100% of Circus population now covered on Silly Walk.
+- Agency engagement for extension industries has commenced.
+
+As at 1 July 1970
+Silly Walk commenced work on expanding industries to include Retail Trade, Accommodation and Non-residential Construction industries.
+
+As at June 1970
+Stage 2 of the project is commencing and will build up the solution delivered in Silly Walk Stage 1. Customer journey will be revised in line with outcome of customer testing. The increased coverage target of at least 95% of the Circus population was met in June 1970, with all local governments included on Silly Walk. Benefits realisation through marketing and promotion of Silly Walk.",https://example.com
+DDSSHHESW,"Department of Defence, Social Security, Health, Housing, Education, and Silly Walks",High,Flying Circus Modernisation and Transformation Program - Tranche 1,"The Flying Circus Modernisation and Transformation (FCMT) Program seeks to reduce the risks associated with department legacy systems by delivering contemporary, consolidated, integrated, user-friendly applications to support delivery of Flying Circus outcomes. To optimise the technical capabilities of the new solutions, engagement with business teams in the review and development of business processes is a priority. ",Trust,Delivery,01/07/1969,31/08/1971,28/02/1971,52,G,8692200,9614968,4961147,Y,Y,Y,"As at 28 February 1971
+- Tranche 1 FCMT projects continue on schedule and on budget for Tranche 1 completion by 31 August 1971.
+- Customer Engagement and Contract Establishment projects continue to progress focusing on delivery activities for new CRM and Portal enhancements.
+- FCMT Tranche 2 Business Case tracking for completion April 1971.
+
+As at 31 January 1971
+- FCMT Projects continue to track to schedule and on budget for Tranche 1 completion 31 August 1971.
+- Customer Engagement and Contract Establishment Projects progressing well with delivery activities for new CRM and Portal enhancements.
+
+As at 31 December 1970
+Status GREEN
+- FCMT projects continuing to track to board endorsed updated schedule and on budget for Tranche 1 completion on 31 August 1971.
+- Customer Engagement and Contract Establishment projects completed partner onboarding and delivery activities underway.
+- Planning in progress for Tranche 2, focusing on remaining legacy systems for planned commencement at completion of Tranch 1.
+
+As at 30 November 1970
+Status GREEN
+- Tranche 1 delivery date extended to 31 August 1971 due to CRM vendor procurement delays and subsequent additional time requirements for build completion and testing of new CRM.
+- All projects maintaining momentum and progressing to revised schedule within budget.
+
+As at 31 October 1970
+Status GREEN
+-New 'Partner Portal' Digital Channel continues to perform well with 3516 registered, active, external users from 634 different organisations. Update release being planned for January 1971.
+-SkillsCRM (CEP Project) delivery partner on-boarded and formal delivery stage commenced.
+-Contract Establishment and Variation (CEV PRoject) continuing delivery partner select with a view to commencing prior to end of December 1970.
+
+As at 30 September 1970 Status GREEN.
+The FCMT 'Partner Portal' solution was successfully launched on the 17 August 1970. The decommissioning of the outdated legacy application, 'WalkConnect', has completed. Work is now increasing on the next Flying Circus systems to be replaced, SkillsCRM (via the Customer Engagement Project) and Policy on Line (via the Contract Establishment and Variation Project).
+Project Journey Events:
+- Partner Portal. After the successful launch of Partner Portal and decommissioning of WalkConnect, the transition to BAU is underway with the Project team continuing to support business until BAU transition is completed.
+- Data, Infrastructure and Reporting.
+New 'Data Lake' infrastructure built. Data ingestion processes being trialled. QTS report requirement gathering underway which will showcase new capability once completed. Compliance tool SMCM successfully launched September 30.
+-Customer Engagement Project (CEP). Completed assurance reviews successfully. Delivery partner selection completed. Partner and formal delivery stage due to start 18 October 1970. Ramp up of activities continuing with business demonstrations of CRM proof of concept.
+-Contract Establishment and Variation (CEV).
+Requirements gathering completed. Delivery partner selection process commenced. 'As is' process documentation underway.
+
+As at 31 August 1970
+Status GREEN. The project remains on track. Successful launch of new secure 'Partner Portal' Digital Channel for Flying Circus related organisations occurred 17 August 1970.
+
+Current Projects underway:
+- Partner Portal. Go-live occurred on track 17 August 1970. All registered Flying Circus organisations now able to use the portal to access key applications and send information to DDSSHHESW via secure channel. Enhanced support being provided for 6 weeks. Legacy system decommissioning underway.
+- Data, Infrastructure and Reporting. Build of initial Data Lake (centralised, quality, information source) continuing and requirement gathering of first report planned to use new capabilites commenced.
+- Customer Services Hub (CRM). Implementation partner selection complete. Solution delivery activities due to start by end September 1970.
+- Contract Engagement and Variation. Requirements gathering complete and partner selection process to commence by end September 1970.
+
+As at 31 July 1970
+Status GREEN
+
+Project journey events:
+Implementation of next changes to FCMT applications remain on track for August 1970 with full launch of new secure Partner Portal Digital Channel for Flying Circus related organisations.
+FCMT Program scope adjusted to include additional at risk system decommission activties during this financial year. Approved expenditure updated to align with revised scope.
+
+Current Projects underway
+- Partner Portal. Opened for registrations 4 July 1970. Majority of Flying Circus related organisation now registered. Full access (go-live) on track to commence 17 August 1970. Legacy system to be disabled and decommissioned September 1970.
+- Data, Infrastructure and Reporting. Build of initial Data Lake (centralised, quality, information source) underway with population and work on first report to commence in September.
+- Customer Services Hub (CRM). Requirements confirmed and partner selection underway. Work on legacy CRM replacement due to start September/October 1970.
+- Contract Engagement and Variation. Requirements gathering and new process design activities in progress.
+
+15 May 1970 Update
+Status GREEN
+
+Implementation of next changes to Flying Circus applications on track for August 1970 with introduction of new secure 'Silly Portal' Digital Channel for Flying Circus related organisations.
+
+Projects Completed
+-Database consolidation - key databases transitioned to supported versions and platforms. Completed November 1969.
+-System to System Integration platform. Completed 9 May 1970.
+
+Current projects underway
+-Partner Portal secure digital channel, in final testing. Pilot successfully complete and on track for release in August 1970.
+Projects in startup
+-Data, Infrastructure and Reporting, planning underway.
+-Customer Services Hub (CRM), planning underway.
+-Contract Engagement and Variation, planning underway.
+-Planning continues for Tranche 2.",https://example.com
diff --git a/ckanext/xloader/tests/samples/sample_with_quoted_commas.csv b/ckanext/xloader/tests/samples/sample_with_quoted_commas.csv
new file mode 100644
index 00000000..7fe94e5b
--- /dev/null
+++ b/ckanext/xloader/tests/samples/sample_with_quoted_commas.csv
@@ -0,0 +1,4 @@
+Funding agency,Program title,Opening date,Service ID
+DTIS,"Department of Employment, Small Business and Training",23/03/2023,63039
+DTIS,"Foo, baz, meh",22/03/2023,63040
+,,,63041
diff --git a/ckanext/xloader/tests/test_loader.py b/ckanext/xloader/tests/test_loader.py
index 59e95e77..ffb3dcba 100644
--- a/ckanext/xloader/tests/test_loader.py
+++ b/ckanext/xloader/tests/test_loader.py
@@ -620,6 +620,54 @@ def test_german(self, Session):
u"tsvector",
] + [u"text"] * (len(records[0]) - 1)
+ def test_with_blanks(self, Session):
+ csv_filepath = get_sample_filepath("sample_with_blanks.csv")
+ resource = factories.Resource()
+ resource_id = resource['id']
+ loader.load_csv(
+ csv_filepath,
+ resource_id=resource_id,
+ mimetype="text/csv",
+ logger=logger,
+ )
+ assert len(self._get_records(Session, resource_id)) == 3
+
+ def test_with_quoted_commas(self, Session):
+ csv_filepath = get_sample_filepath("sample_with_quoted_commas.csv")
+ resource = factories.Resource()
+ resource_id = resource['id']
+ loader.load_csv(
+ csv_filepath,
+ resource_id=resource_id,
+ mimetype="text/csv",
+ logger=logger,
+ )
+ assert len(self._get_records(Session, resource_id)) == 3
+
+ def test_with_mixed_quotes(self, Session):
+ csv_filepath = get_sample_filepath("sample_with_mixed_quotes.csv")
+ resource = factories.Resource()
+ resource_id = resource['id']
+ loader.load_csv(
+ csv_filepath,
+ resource_id=resource_id,
+ mimetype="text/csv",
+ logger=logger,
+ )
+ assert len(self._get_records(Session, resource_id)) == 2
+
+ def test_with_mixed_types(self, Session):
+ csv_filepath = get_sample_filepath("mixed_numeric_string_sample.csv")
+ resource = factories.Resource()
+ resource_id = resource['id']
+ loader.load_csv(
+ csv_filepath,
+ resource_id=resource_id,
+ mimetype="text/csv",
+ logger=logger,
+ )
+ assert len(self._get_records(Session, resource_id)) == 2
+
def test_reload(self, Session):
csv_filepath = get_sample_filepath("simple.csv")
resource = factories.Resource()
@@ -1143,3 +1191,60 @@ def test_no_entries(self):
mimetype="csv",
logger=logger,
)
+
+ def test_with_quoted_commas(self, Session):
+ csv_filepath = get_sample_filepath("sample_with_quoted_commas.csv")
+ resource = factories.Resource()
+ resource_id = resource['id']
+ loader.load_table(
+ csv_filepath,
+ resource_id=resource_id,
+ mimetype="text/csv",
+ logger=logger,
+ )
+ assert len(self._get_records(Session, resource_id)) == 3
+
+ def test_with_iso_8859_1(self, Session):
+ csv_filepath = get_sample_filepath("non_utf8_sample.csv")
+ resource = factories.Resource()
+ resource_id = resource['id']
+ loader.load_table(
+ csv_filepath,
+ resource_id=resource_id,
+ mimetype="text/csv",
+ logger=logger,
+ )
+ assert len(self._get_records(Session, resource_id)) == 266
+
+ def test_with_mixed_quotes(self, Session):
+ csv_filepath = get_sample_filepath("sample_with_mixed_quotes.csv")
+ resource = factories.Resource()
+ resource_id = resource['id']
+ loader.load_table(
+ csv_filepath,
+ resource_id=resource_id,
+ mimetype="text/csv",
+ logger=logger,
+ )
+ assert len(self._get_records(Session, resource_id)) == 2
+
+ def test_preserving_time_ranges(self, Session):
+ """ Time ranges should not be treated as timestamps
+ """
+ csv_filepath = get_sample_filepath("non_timestamp_sample.csv")
+ resource = factories.Resource()
+ resource_id = resource['id']
+ loader.load_table(
+ csv_filepath,
+ resource_id=resource_id,
+ mimetype="text/csv",
+ logger=logger,
+ )
+ assert self._get_records(Session, resource_id) == [
+ (1, "Adavale", 4474, Decimal("-25.9092582"), Decimal("144.5975769"),
+ "8:00", "16:00", datetime.datetime(2018, 7, 19)),
+ (2, "Aramac", 4726, Decimal("-22.971298"), Decimal("145.241481"),
+ "9:00-13:00", "14:00-16:45", datetime.datetime(2018, 7, 17)),
+ (3, "Barcaldine", 4725, Decimal("-23.55327901"), Decimal("145.289156"),
+ "9:00-12:30", "13:30-16:30", datetime.datetime(2018, 7, 20))
+ ]
diff --git a/ckanext/xloader/utils.py b/ckanext/xloader/utils.py
index cbffaa2f..994e6754 100644
--- a/ckanext/xloader/utils.py
+++ b/ckanext/xloader/utils.py
@@ -9,9 +9,39 @@
from decimal import Decimal
import ckan.plugins as p
-
-
-def resource_data(id, resource_id):
+from ckan.plugins.toolkit import config
+
+# resource.formats accepted by ckanext-xloader. Must be lowercase here.
+DEFAULT_FORMATS = [
+ "csv",
+ "application/csv",
+ "xls",
+ "xlsx",
+ "tsv",
+ "application/vnd.ms-excel",
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+ "ods",
+ "application/vnd.oasis.opendocument.spreadsheet",
+]
+
+
+class XLoaderFormats(object):
+ formats = None
+
+ @classmethod
+ def is_it_an_xloader_format(cls, format_):
+ if cls.formats is None:
+ cls._formats = config.get("ckanext.xloader.formats")
+ if cls._formats is not None:
+ cls._formats = cls._formats.lower().split()
+ else:
+ cls._formats = DEFAULT_FORMATS
+ if not format_:
+ return False
+ return format_.lower() in cls._formats
+
+
+def resource_data(id, resource_id, rows=None):
if p.toolkit.request.method == "POST":
try:
@@ -44,13 +74,16 @@ def resource_data(id, resource_id):
except p.toolkit.NotAuthorized:
return p.toolkit.abort(403, p.toolkit._("Not authorized to see this page"))
+ extra_vars = {
+ "status": xloader_status,
+ "resource": resource,
+ "pkg_dict": pkg_dict,
+ }
+ if rows:
+ extra_vars["rows"] = rows
return p.toolkit.render(
"xloader/resource_data.html",
- extra_vars={
- "status": xloader_status,
- "resource": resource,
- "pkg_dict": pkg_dict,
- },
+ extra_vars=extra_vars,
)
@@ -175,10 +208,10 @@ def type_guess(rows, types=TYPES, strict=False):
for ci, cell in enumerate(row):
if not cell:
continue
- at_least_one_value[ci] = True
for type in list(guesses[ci].keys()):
if not isinstance(cell, type):
guesses[ci].pop(type)
+ at_least_one_value[ci] = True if guesses[ci] else False
# no need to set guessing weights before this
# because we only accept a type if it never fails
for i, guess in enumerate(guesses):
@@ -212,3 +245,13 @@ def type_guess(rows, types=TYPES, strict=False):
guesses_tuples = [(t, guess[t]) for t in types if t in guess]
_columns.append(max(guesses_tuples, key=lambda t_n: t_n[1])[0])
return _columns
+
+
+def datastore_resource_exists(resource_id):
+ context = {'model': model, 'ignore_auth': True}
+ try:
+ response = p.toolkit.get_action('datastore_search')(context, dict(
+ id=resource_id, limit=0))
+ except p.toolkit.ObjectNotFound:
+ return False
+ return response or {'fields': []}
diff --git a/ckanext/xloader/views.py b/ckanext/xloader/views.py
index 198de320..5a56322c 100644
--- a/ckanext/xloader/views.py
+++ b/ckanext/xloader/views.py
@@ -1,4 +1,4 @@
-from flask import Blueprint
+from flask import Blueprint, request
import ckanext.xloader.utils as utils
@@ -12,4 +12,12 @@ def get_blueprints():
@xloader.route("/dataset//resource_data/", methods=("GET", "POST"))
def resource_data(id, resource_id):
- return utils.resource_data(id, resource_id)
+ rows = request.args.get('rows')
+ if rows:
+ try:
+ rows = int(rows)
+ if rows < 0:
+ rows = None
+ except ValueError:
+ rows = None
+ return utils.resource_data(id, resource_id, rows)
From 12d1445f62172749b90a7ce0063f6109b764c9dc Mon Sep 17 00:00:00 2001
From: William Dutton
Date: Wed, 1 Nov 2023 10:32:33 +1000
Subject: [PATCH 2/5] file import improvements
* [QOLDEV-347] apply 'str' fallback type correctly
* [QOLDEV-347] fix validation errors on empty strings
* [QOLDEV-424] set default CSV sample size in config to match previous product 1000 lines
* [QOLDEV-424] handle parsing CSV file with commas inside quotes better
---
ckanext/xloader/loader.py | 54 +++++--
ckanext/xloader/parser.py | 210 ++++++++-------------------
ckanext/xloader/tests/test_parser.py | 10 +-
3 files changed, 106 insertions(+), 168 deletions(-)
diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py
index 46c38dec..aabaefbe 100644
--- a/ckanext/xloader/loader.py
+++ b/ckanext/xloader/loader.py
@@ -10,14 +10,13 @@
import psycopg2
from six.moves import zip
-from tabulator import Stream, TabulatorException
+from tabulator import config as tabulator_config, EncodingError, Stream, TabulatorException
from unidecode import unidecode
import ckan.plugins as p
-import ckan.plugins.toolkit as tk
from .job_exceptions import FileCouldNotBeLoadedError, LoaderError
-from .parser import XloaderCSVParser
+from .parser import CSV_SAMPLE_LINES, TypeConverter
from .utils import datastore_resource_exists, headers_guess, type_guess
from ckan.plugins.toolkit import config
@@ -29,6 +28,34 @@
_drop_indexes = datastore_db._drop_indexes
MAX_COLUMN_LENGTH = 63
+tabulator_config.CSV_SAMPLE_LINES = CSV_SAMPLE_LINES
+
+
+class UnknownEncodingStream(object):
+ """ Provides a context manager that wraps a Tabulator stream
+ and tries multiple encodings if one fails.
+
+ This is particularly relevant in cases like Latin-1 encoding,
+ which is usually ASCII and thus the sample could be sniffed as UTF-8,
+ only to run into problems later in the file.
+ """
+
+ def __init__(self, filepath, file_format, **kwargs):
+ self.filepath = filepath
+ self.file_format = file_format
+ self.stream_args = kwargs
+
+ def __enter__(self):
+ try:
+ self.stream = Stream(self.filepath, format=self.file_format,
+ **self.stream_args).__enter__()
+ except (EncodingError, UnicodeDecodeError):
+ self.stream = Stream(self.filepath, format=self.file_format,
+ encoding='latin1', **self.stream_args).__enter__()
+ return self.stream
+
+ def __exit__(self, *args):
+ return self.stream.__exit__(*args)
def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None):
@@ -37,12 +64,12 @@ def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None):
# Determine the header row
try:
file_format = os.path.splitext(csv_filepath)[1].strip('.')
- with Stream(csv_filepath, format=file_format) as stream:
+ with UnknownEncodingStream(csv_filepath, file_format) as stream:
header_offset, headers = headers_guess(stream.sample)
except TabulatorException:
try:
file_format = mimetype.lower().split('/')[-1]
- with Stream(csv_filepath, format=file_format) as stream:
+ with UnknownEncodingStream(csv_filepath, file_format) as stream:
header_offset, headers = headers_guess(stream.sample)
except TabulatorException as e:
raise LoaderError('Tabulator error: {}'.format(e))
@@ -73,7 +100,8 @@ def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None):
logger.info('Ensuring character coding is UTF8')
f_write = tempfile.NamedTemporaryFile(suffix=file_format, delete=False)
try:
- with Stream(csv_filepath, format=file_format, skip_rows=skip_rows) as stream:
+ with UnknownEncodingStream(csv_filepath, file_format,
+ skip_rows=skip_rows) as stream:
stream.save(target=f_write.name, format='csv', encoding='utf-8',
delimiter=delimiter)
csv_filepath = f_write.name
@@ -237,14 +265,14 @@ def load_table(table_filepath, resource_id, mimetype='text/csv', logger=None):
logger.info('Determining column names and types')
try:
file_format = os.path.splitext(table_filepath)[1].strip('.')
- with Stream(table_filepath, format=file_format,
- custom_parsers={'csv': XloaderCSVParser}) as stream:
+ with UnknownEncodingStream(table_filepath, file_format,
+ post_parse=[TypeConverter().convert_types]) as stream:
header_offset, headers = headers_guess(stream.sample)
except TabulatorException:
try:
file_format = mimetype.lower().split('/')[-1]
- with Stream(table_filepath, format=file_format,
- custom_parsers={'csv': XloaderCSVParser}) as stream:
+ with UnknownEncodingStream(table_filepath, file_format,
+ post_parse=[TypeConverter().convert_types]) as stream:
header_offset, headers = headers_guess(stream.sample)
except TabulatorException as e:
raise LoaderError('Tabulator error: {}'.format(e))
@@ -279,9 +307,11 @@ def load_table(table_filepath, resource_id, mimetype='text/csv', logger=None):
for t, h in zip(types, headers)]
headers = [header.strip()[:MAX_COLUMN_LENGTH] for header in headers if header.strip()]
+ type_converter = TypeConverter(types=types)
- with Stream(table_filepath, format=file_format, skip_rows=skip_rows,
- custom_parsers={'csv': XloaderCSVParser}) as stream:
+ with UnknownEncodingStream(table_filepath, file_format,
+ skip_rows=skip_rows,
+ post_parse=[type_converter.convert_types]) as stream:
def row_iterator():
for row in stream:
data_row = {}
diff --git a/ckanext/xloader/parser.py b/ckanext/xloader/parser.py
index b2a6f889..812ccd1f 100644
--- a/ckanext/xloader/parser.py
+++ b/ckanext/xloader/parser.py
@@ -1,163 +1,71 @@
# -*- coding: utf-8 -*-
-import csv
-from codecs import iterencode
+import datetime
from decimal import Decimal, InvalidOperation
-from itertools import chain
-
+import re
import six
-from ckan.plugins.toolkit import asbool
-from dateutil.parser import isoparser, parser
-from dateutil.parser import ParserError
-from tabulator import helpers
-from tabulator.parser import Parser
+from ckan.plugins.toolkit import asbool
+from dateutil.parser import isoparser, parser, ParserError
from ckan.plugins.toolkit import config
-CSV_SAMPLE_LINES = 100
+CSV_SAMPLE_LINES = 1000
+DATE_REGEX = re.compile(r'''^\d{1,4}[-/.\s]\S+[-/.\s]\S+''')
-class XloaderCSVParser(Parser):
- """Extends tabulator CSVParser to detect datetime and numeric values.
+class TypeConverter:
+ """ Post-process table cells to convert strings into numbers and timestamps
+ as desired.
"""
- # Public
-
- options = [
- 'delimiter',
- 'doublequote',
- 'escapechar',
- 'quotechar',
- 'quoting',
- 'skipinitialspace',
- 'lineterminator'
- ]
-
- def __init__(self, loader, force_parse=False, **options):
- super(XloaderCSVParser, self).__init__(loader, force_parse, **options)
- # Set attributes
- self.__loader = loader
- self.__options = options
- self.__force_parse = force_parse
- self.__extended_rows = None
- self.__encoding = None
- self.__dialect = None
- self.__chars = None
-
- @property
- def closed(self):
- return self.__chars is None or self.__chars.closed
-
- def open(self, source, encoding=None):
- # Close the character stream, if necessary, before reloading it.
- self.close()
- self.__chars = self.__loader.load(source, encoding=encoding)
- self.__encoding = getattr(self.__chars, 'encoding', encoding)
- if self.__encoding:
- self.__encoding.lower()
- self.reset()
-
- def close(self):
- if not self.closed:
- self.__chars.close()
-
- def reset(self):
- helpers.reset_stream(self.__chars)
- self.__extended_rows = self.__iter_extended_rows()
-
- @property
- def encoding(self):
- return self.__encoding
-
- @property
- def dialect(self):
- if self.__dialect:
- dialect = {
- 'delimiter': self.__dialect.delimiter,
- 'doubleQuote': self.__dialect.doublequote,
- 'lineTerminator': self.__dialect.lineterminator,
- 'quoteChar': self.__dialect.quotechar,
- 'skipInitialSpace': self.__dialect.skipinitialspace,
- }
- if self.__dialect.escapechar is not None:
- dialect['escapeChar'] = self.__dialect.escapechar
- return dialect
-
- @property
- def extended_rows(self):
- return self.__extended_rows
-
- # Private
-
- def __iter_extended_rows(self):
-
- def type_value(value):
- """Returns numeric values as Decimal(). Uses dateutil to parse
- date values. Otherwise, returns values as it receives them
- (strings).
- """
- if value in ('', None):
- return ''
-
- try:
- return Decimal(value)
- except InvalidOperation:
- pass
-
- try:
- i = isoparser()
- return i.isoparse(value)
- except ValueError:
- pass
-
- try:
- p = parser()
- yearfirst = asbool(config.get(
- 'ckanext.xloader.parse_dates_yearfirst', False))
- dayfirst = asbool(config.get(
- 'ckanext.xloader.parse_dates_dayfirst', False))
- return p.parse(value, yearfirst=yearfirst, dayfirst=dayfirst)
- except ParserError:
- pass
-
- return value
-
- sample, dialect = self.__prepare_dialect(self.__chars)
- items = csv.reader(chain(sample, self.__chars), dialect=dialect)
- for row_number, item in enumerate(items, start=1):
- values = []
- for value in item:
- value = type_value(value)
- values.append(value)
- yield row_number, None, list(values)
-
- def __prepare_dialect(self, stream):
-
- # Get sample
- sample = []
- while True:
- try:
- sample.append(next(stream))
- except StopIteration:
- break
- if len(sample) >= CSV_SAMPLE_LINES:
- break
-
- # Get dialect
+ def __init__(self, types=None):
+ self.types = types
+
+ def convert_types(self, extended_rows):
+ """ Try converting cells to numbers or timestamps if applicable.
+ If a list of types was supplied, use that.
+ If not, then try converting each column to numeric first,
+ then to a timestamp. If both fail, just keep it as a string.
+ """
+ for row_number, headers, row in extended_rows:
+ for cell_index, cell_value in enumerate(row):
+ if cell_value is None:
+ row[cell_index] = ''
+ if not cell_value:
+ continue
+ cell_type = self.types[cell_index] if self.types else None
+ if cell_type in [Decimal, None]:
+ converted_value = to_number(cell_value)
+ if converted_value:
+ row[cell_index] = converted_value
+ continue
+ if cell_type in [datetime.datetime, None]:
+ converted_value = to_timestamp(cell_value)
+ if converted_value:
+ row[cell_index] = converted_value
+ yield (row_number, headers, row)
+
+
+def to_number(value):
+ if not isinstance(value, six.string_types):
+ return None
+ try:
+ return Decimal(value)
+ except InvalidOperation:
+ return None
+
+
+def to_timestamp(value):
+ if not isinstance(value, six.string_types) or not DATE_REGEX.search(value):
+ return None
+ try:
+ i = isoparser()
+ return i.isoparse(value)
+ except ValueError:
try:
- separator = ''
- delimiter = self.__options.get('delimiter', ',\t;|')
- dialect = csv.Sniffer().sniff(separator.join(sample), delimiter)
- if not dialect.escapechar:
- dialect.doublequote = True
- except csv.Error:
- class dialect(csv.excel):
- pass
- for key, value in self.__options.items():
- setattr(dialect, key, value)
- # https://github.com/frictionlessdata/FrictionlessDarwinCore/issues/1
- if getattr(dialect, 'quotechar', None) == '':
- setattr(dialect, 'quoting', csv.QUOTE_NONE)
-
- self.__dialect = dialect
- return sample, dialect
+ p = parser()
+ yearfirst = asbool(config.get('ckanext.xloader.parse_dates_yearfirst', False))
+ dayfirst = asbool(config.get('ckanext.xloader.parse_dates_dayfirst', False))
+ return p.parse(value, yearfirst=yearfirst, dayfirst=dayfirst)
+ except ParserError:
+ return None
diff --git a/ckanext/xloader/tests/test_parser.py b/ckanext/xloader/tests/test_parser.py
index 67929d9f..ac4047dd 100644
--- a/ckanext/xloader/tests/test_parser.py
+++ b/ckanext/xloader/tests/test_parser.py
@@ -6,7 +6,7 @@
from datetime import datetime
from tabulator import Stream
-from ckanext.xloader.parser import XloaderCSVParser
+from ckanext.xloader.parser import TypeConverter
csv_filepath = os.path.abspath(
os.path.join(os.path.dirname(__file__), "samples", "date_formats.csv")
@@ -16,7 +16,7 @@
class TestParser(object):
def test_simple(self):
with Stream(csv_filepath, format='csv',
- custom_parsers={'csv': XloaderCSVParser}) as stream:
+ post_parse=[TypeConverter().convert_types]) as stream:
assert stream.sample == [
[
'date',
@@ -49,7 +49,7 @@ def test_simple(self):
def test_dayfirst(self):
print('test_dayfirst')
with Stream(csv_filepath, format='csv',
- custom_parsers={'csv': XloaderCSVParser}) as stream:
+ post_parse=[TypeConverter().convert_types]) as stream:
assert stream.sample == [
[
'date',
@@ -82,7 +82,7 @@ def test_dayfirst(self):
def test_yearfirst(self):
print('test_yearfirst')
with Stream(csv_filepath, format='csv',
- custom_parsers={'csv': XloaderCSVParser}) as stream:
+ post_parse=[TypeConverter().convert_types]) as stream:
assert stream.sample == [
[
'date',
@@ -115,7 +115,7 @@ def test_yearfirst(self):
@pytest.mark.ckan_config("ckanext.xloader.parse_dates_yearfirst", True)
def test_yearfirst_dayfirst(self):
with Stream(csv_filepath, format='csv',
- custom_parsers={'csv': XloaderCSVParser}) as stream:
+ post_parse=[TypeConverter().convert_types]) as stream:
assert stream.sample == [
[
'date',
From f9ef556dd89b88b4e148f17f296bb63fe67e1797 Mon Sep 17 00:00:00 2001
From: William Dutton
Date: Wed, 1 Nov 2023 11:51:35 +1000
Subject: [PATCH 3/5] Use chardet to confidently guess encoding and use it if
above 70% confidence
Also have fallback to windows encoding if all else fails
---
ckanext/xloader/loader.py | 57 +++++++++++++++++++++++++++++----------
requirements.txt | 1 +
2 files changed, 44 insertions(+), 14 deletions(-)
diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py
index aabaefbe..233a46e6 100644
--- a/ckanext/xloader/loader.py
+++ b/ckanext/xloader/loader.py
@@ -9,6 +9,7 @@
from decimal import Decimal
import psycopg2
+from chardet.universaldetector import UniversalDetector
from six.moves import zip
from tabulator import config as tabulator_config, EncodingError, Stream, TabulatorException
from unidecode import unidecode
@@ -30,6 +31,8 @@
MAX_COLUMN_LENGTH = 63
tabulator_config.CSV_SAMPLE_LINES = CSV_SAMPLE_LINES
+ISO_8859_ENCODING = 'latin1'
+
class UnknownEncodingStream(object):
""" Provides a context manager that wraps a Tabulator stream
@@ -40,36 +43,55 @@ class UnknownEncodingStream(object):
only to run into problems later in the file.
"""
- def __init__(self, filepath, file_format, **kwargs):
+ def __init__(self, filepath, file_format, decoding_result, **kwargs):
self.filepath = filepath
self.file_format = file_format
self.stream_args = kwargs
+ self.decoding_result = decoding_result # {'encoding': 'EUC-JP', 'confidence': 0.99}
def __enter__(self):
try:
- self.stream = Stream(self.filepath, format=self.file_format,
- **self.stream_args).__enter__()
+
+ if (self.decoding_result and self.decoding_result['confidence'] and self.decoding_result['confidence'] > 0.7):
+ self.stream = Stream(self.filepath, format=self.file_format, encoding=self.decoding_result['encoding'],
+ ** self.stream_args).__enter__()
+ else:
+ self.stream = Stream(self.filepath, format=self.file_format, ** self.stream_args).__enter__()
+
except (EncodingError, UnicodeDecodeError):
self.stream = Stream(self.filepath, format=self.file_format,
- encoding='latin1', **self.stream_args).__enter__()
+ encoding=ISO_8859_ENCODING, **self.stream_args).__enter__()
return self.stream
def __exit__(self, *args):
return self.stream.__exit__(*args)
+def detect_encoding(file_path):
+ detector = UniversalDetector()
+ with open(file_path, 'rb') as file:
+ for line in file:
+ detector.feed(line)
+ if detector.done:
+ break
+ detector.close()
+ return detector.result # e.g. {'encoding': 'EUC-JP', 'confidence': 0.99}
+
+
def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None):
'''Loads a CSV into DataStore. Does not create the indexes.'''
+ decoding_result = detect_encoding(csv_filepath)
+ logger.info("load_csv: Decoded encoding: %s", decoding_result)
# Determine the header row
try:
file_format = os.path.splitext(csv_filepath)[1].strip('.')
- with UnknownEncodingStream(csv_filepath, file_format) as stream:
+ with UnknownEncodingStream(csv_filepath, file_format, decoding_result) as stream:
header_offset, headers = headers_guess(stream.sample)
except TabulatorException:
try:
file_format = mimetype.lower().split('/')[-1]
- with UnknownEncodingStream(csv_filepath, file_format) as stream:
+ with UnknownEncodingStream(csv_filepath, file_format, decoding_result) as stream:
header_offset, headers = headers_guess(stream.sample)
except TabulatorException as e:
raise LoaderError('Tabulator error: {}'.format(e))
@@ -100,11 +122,16 @@ def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None):
logger.info('Ensuring character coding is UTF8')
f_write = tempfile.NamedTemporaryFile(suffix=file_format, delete=False)
try:
- with UnknownEncodingStream(csv_filepath, file_format,
- skip_rows=skip_rows) as stream:
- stream.save(target=f_write.name, format='csv', encoding='utf-8',
- delimiter=delimiter)
- csv_filepath = f_write.name
+ save_args = {'target': f_write.name, 'format': 'csv', 'encoding': 'utf-8', 'delimiter': delimiter}
+ try:
+ with UnknownEncodingStream(csv_filepath, file_format, decoding_result,
+ skip_rows=skip_rows) as stream:
+ stream.save(**save_args)
+ except (EncodingError, UnicodeDecodeError):
+ with Stream(csv_filepath, format=file_format, encoding=ISO_8859_ENCODING,
+ skip_rows=skip_rows) as stream:
+ stream.save(**save_args)
+ csv_filepath = f_write.name
# datastore db connection
engine = get_write_engine()
@@ -263,15 +290,17 @@ def load_table(table_filepath, resource_id, mimetype='text/csv', logger=None):
# Determine the header row
logger.info('Determining column names and types')
+ decoding_result = detect_encoding(table_filepath)
+ logger.info("load_table: Decoded encoding: %s", decoding_result)
try:
file_format = os.path.splitext(table_filepath)[1].strip('.')
- with UnknownEncodingStream(table_filepath, file_format,
+ with UnknownEncodingStream(table_filepath, file_format, decoding_result,
post_parse=[TypeConverter().convert_types]) as stream:
header_offset, headers = headers_guess(stream.sample)
except TabulatorException:
try:
file_format = mimetype.lower().split('/')[-1]
- with UnknownEncodingStream(table_filepath, file_format,
+ with UnknownEncodingStream(table_filepath, file_format, decoding_result,
post_parse=[TypeConverter().convert_types]) as stream:
header_offset, headers = headers_guess(stream.sample)
except TabulatorException as e:
@@ -309,7 +338,7 @@ def load_table(table_filepath, resource_id, mimetype='text/csv', logger=None):
headers = [header.strip()[:MAX_COLUMN_LENGTH] for header in headers if header.strip()]
type_converter = TypeConverter(types=types)
- with UnknownEncodingStream(table_filepath, file_format,
+ with UnknownEncodingStream(table_filepath, file_format, decoding_result,
skip_rows=skip_rows,
post_parse=[type_converter.convert_types]) as stream:
def row_iterator():
diff --git a/requirements.txt b/requirements.txt
index 58540beb..fe92b6d7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,3 +4,4 @@ six>=1.12.0
tabulator==1.53.5
Unidecode==1.0.22
python-dateutil>=2.8.2
+chardet==5.2.0
\ No newline at end of file
From bb343559c4587921a01bcfa1aaa51228dfbc205a Mon Sep 17 00:00:00 2001
From: ThrawnCA
Date: Thu, 2 Nov 2023 09:18:30 +1000
Subject: [PATCH 4/5] [QOLSVC-2984] sniff using Windows-1252 encoding rather
than Latin-1
- Windows-1252 is a superset, which makes it more useful for this purpose
---
ckanext/xloader/loader.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py
index 233a46e6..856944e0 100644
--- a/ckanext/xloader/loader.py
+++ b/ckanext/xloader/loader.py
@@ -31,7 +31,7 @@
MAX_COLUMN_LENGTH = 63
tabulator_config.CSV_SAMPLE_LINES = CSV_SAMPLE_LINES
-ISO_8859_ENCODING = 'latin1'
+SINGLE_BYTE_ENCODING = 'cp1252'
class UnknownEncodingStream(object):
@@ -60,7 +60,7 @@ def __enter__(self):
except (EncodingError, UnicodeDecodeError):
self.stream = Stream(self.filepath, format=self.file_format,
- encoding=ISO_8859_ENCODING, **self.stream_args).__enter__()
+ encoding=SINGLE_BYTE_ENCODING, **self.stream_args).__enter__()
return self.stream
def __exit__(self, *args):
@@ -128,7 +128,7 @@ def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None):
skip_rows=skip_rows) as stream:
stream.save(**save_args)
except (EncodingError, UnicodeDecodeError):
- with Stream(csv_filepath, format=file_format, encoding=ISO_8859_ENCODING,
+ with Stream(csv_filepath, format=file_format, encoding=SINGLE_BYTE_ENCODING,
skip_rows=skip_rows) as stream:
stream.save(**save_args)
csv_filepath = f_write.name
From 6eb5658b31b11391dd96b89e61a99cad28d05f7f Mon Sep 17 00:00:00 2001
From: antuarc
Date: Fri, 2 Feb 2024 14:45:54 +1000
Subject: [PATCH 5/5] further cleanup
- Recognise 0 as a valid numeric value
- Fix whitespace and unused import
- Extract maximum retry count to a constant
- Use context managers to automatically close streams
- Add README note about configuring PostgreSQL date style
- Add titles to queued jobs so they are more easily administered
---
README.rst | 6 ++++++
ckanext/xloader/action.py | 4 +++-
ckanext/xloader/db.py | 10 +---------
ckanext/xloader/jobs.py | 17 +++++++++--------
ckanext/xloader/parser.py | 4 +++-
ckanext/xloader/plugin.py | 4 ++--
.../templates/xloader/resource_data.html | 2 +-
ckanext/xloader/tests/samples/simple-large.csv | 3 ++-
ckanext/xloader/tests/test_loader.py | 17 +++++++++++++++++
ckanext/xloader/views.py | 2 +-
10 files changed, 45 insertions(+), 24 deletions(-)
diff --git a/README.rst b/README.rst
index 0586f336..95c3015e 100644
--- a/README.rst
+++ b/README.rst
@@ -196,6 +196,12 @@ This setting is shared with other plugins that download resource files, such as
ckan.download_proxy = http://my-proxy:1234/
+You may also wish to configure the database to use your preferred date input style on COPY.
+For example, to make [PostgreSQL](https://www.postgresql.org/docs/current/runtime-config-client.html#RUNTIME-CONFIG-CLIENT-FORMAT)
+expect European (day-first) dates, you could add to ``postgresql.conf``:
+
+ datestyle=ISO,DMY
+
------------------------
Developer installation
------------------------
diff --git a/ckanext/xloader/action.py b/ckanext/xloader/action.py
index e45394a9..aabc8148 100644
--- a/ckanext/xloader/action.py
+++ b/ckanext/xloader/action.py
@@ -160,7 +160,9 @@ def xloader_submit(context, data_dict):
try:
job = enqueue_job(
- jobs.xloader_data_into_datastore, [data], rq_kwargs=dict(timeout=timeout)
+ jobs.xloader_data_into_datastore, [data],
+ title="xloader_submit: package: {} resource: {}".format(resource_dict.get('package_id'), res_id),
+ rq_kwargs=dict(timeout=timeout)
)
except Exception:
log.exception('Unable to enqueued xloader res_id=%s', res_id)
diff --git a/ckanext/xloader/db.py b/ckanext/xloader/db.py
index a3078ea4..a93eb0d8 100644
--- a/ckanext/xloader/db.py
+++ b/ckanext/xloader/db.py
@@ -191,9 +191,7 @@ def add_pending_job(job_id, job_type, api_key,
if not metadata:
metadata = {}
- conn = ENGINE.connect()
- trans = conn.begin()
- try:
+ with ENGINE.begin() as conn:
conn.execute(JOBS_TABLE.insert().values(
job_id=job_id,
job_type=job_type,
@@ -225,12 +223,6 @@ def add_pending_job(job_id, job_type, api_key,
)
if inserts:
conn.execute(METADATA_TABLE.insert(), inserts)
- trans.commit()
- except Exception:
- trans.rollback()
- raise
- finally:
- conn.close()
class InvalidErrorObjectError(Exception):
diff --git a/ckanext/xloader/jobs.py b/ckanext/xloader/jobs.py
index be2c57d5..8393c970 100644
--- a/ckanext/xloader/jobs.py
+++ b/ckanext/xloader/jobs.py
@@ -42,6 +42,7 @@
CHUNK_SIZE = 16 * 1024 # 16kb
DOWNLOAD_TIMEOUT = 30
+MAX_RETRIES = 1
RETRYABLE_ERRORS = (
errors.DeadlockDetected,
errors.LockNotAvailable,
@@ -92,18 +93,21 @@ def xloader_data_into_datastore(input):
db.mark_job_as_errored(job_id, str(e))
job_dict['status'] = 'error'
job_dict['error'] = str(e)
- log.error('xloader error: {0}, {1}'.format(e, traceback.format_exc()))
+ log.error('xloader error: %s, %s', e, traceback.format_exc())
errored = True
except Exception as e:
if isinstance(e, RETRYABLE_ERRORS):
tries = job_dict['metadata'].get('tries', 0)
- if tries == 0:
+ if tries < MAX_RETRIES:
+ tries = tries + 1
log.info("Job %s failed due to temporary error [%s], retrying", job_id, e)
job_dict['status'] = 'pending'
- job_dict['metadata']['tries'] = tries + 1
+ job_dict['metadata']['tries'] = tries
enqueue_job(
xloader_data_into_datastore,
[input],
+ title="retry xloader_data_into_datastore: resource: {} attempt {}".format(
+ job_dict['metadata']['resource_id'], tries),
rq_kwargs=dict(timeout=RETRIED_JOB_TIMEOUT)
)
return None
@@ -112,7 +116,7 @@ def xloader_data_into_datastore(input):
job_id, traceback.format_tb(sys.exc_info()[2])[-1] + repr(e))
job_dict['status'] = 'error'
job_dict['error'] = str(e)
- log.error('xloader error: {0}, {1}'.format(e, traceback.format_exc()))
+ log.error('xloader error: %s, %s', e, traceback.format_exc())
errored = True
finally:
# job_dict is defined in xloader_hook's docstring
@@ -562,8 +566,7 @@ def __init__(self, task_id, input):
self.input = input
def emit(self, record):
- conn = db.ENGINE.connect()
- try:
+ with db.ENGINE.connect() as conn:
# Turn strings into unicode to stop SQLAlchemy
# "Unicode type received non-unicode bind param value" warnings.
message = str(record.getMessage())
@@ -579,8 +582,6 @@ def emit(self, record):
module=module,
funcName=funcName,
lineno=record.lineno))
- finally:
- conn.close()
class DatetimeJsonEncoder(json.JSONEncoder):
diff --git a/ckanext/xloader/parser.py b/ckanext/xloader/parser.py
index 812ccd1f..11e756cd 100644
--- a/ckanext/xloader/parser.py
+++ b/ckanext/xloader/parser.py
@@ -36,7 +36,9 @@ def convert_types(self, extended_rows):
cell_type = self.types[cell_index] if self.types else None
if cell_type in [Decimal, None]:
converted_value = to_number(cell_value)
- if converted_value:
+ # Can't do a simple truthiness check,
+ # because 0 is a valid numeric result.
+ if converted_value is not None:
row[cell_index] = converted_value
continue
if cell_type in [datetime.datetime, None]:
diff --git a/ckanext/xloader/plugin.py b/ckanext/xloader/plugin.py
index 392b1cf5..6e65e466 100644
--- a/ckanext/xloader/plugin.py
+++ b/ckanext/xloader/plugin.py
@@ -78,8 +78,8 @@ def notify(self, entity, operation):
See: ckan/model/modification.py.DomainObjectModificationExtension
"""
if operation != DomainObjectOperation.changed \
- or not isinstance(entity, Resource) \
- or not getattr(entity, 'url_changed', False):
+ or not isinstance(entity, Resource) \
+ or not getattr(entity, 'url_changed', False):
return
context = {
"ignore_auth": True,
diff --git a/ckanext/xloader/templates/xloader/resource_data.html b/ckanext/xloader/templates/xloader/resource_data.html
index 74a5f715..98027508 100644
--- a/ckanext/xloader/templates/xloader/resource_data.html
+++ b/ckanext/xloader/templates/xloader/resource_data.html
@@ -23,7 +23,7 @@
{% set delete_action = h.url_for('xloader.delete_datastore_table', id=pkg.id, resource_id=res.id) %}