Skip to content

Commit

Permalink
checking references duplicates
Browse files Browse the repository at this point in the history
  • Loading branch information
Alex Perfilov committed Dec 27, 2016
1 parent 3474f10 commit 385f5a2
Showing 1 changed file with 6 additions and 1 deletion.
7 changes: 6 additions & 1 deletion ckanext/datajson/datajsonvalidator.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,13 +89,14 @@
import os

omb_burueau_codes = set()
#for row in csv.DictReader(urllib.urlopen("https://project-open-data.cio.gov/data/omb_bureau_codes.csv")):
# for row in csv.DictReader(urllib.urlopen("https://project-open-data.cio.gov/data/omb_bureau_codes.csv")):
# omb_burueau_codes.add(row["Agency Code"] + ":" + row["Bureau Code"])

with open(os.path.join(os.path.dirname(__file__), "resources", "omb_bureau_codes.csv"), "r") as csvfile:
for row in csv.DictReader(csvfile):
omb_burueau_codes.add(row["Agency Code"] + ":" + row["Bureau Code"])


# main function for validation
def do_validation(doc, errors_array, seen_identifiers):
errs = {}
Expand Down Expand Up @@ -376,6 +377,10 @@ def do_validation(doc, errors_array, seen_identifiers):
add_error(errs, 50, "Invalid Field Value (Optional Fields)",
"The field 'references' had an invalid rfc3987 URL: \"%s\"" % s, dataset_name)

if len(item["references"]) != len(set(item["references"])):
add_error(errs, 50, "Invalid Field Value (Optional Fields)",
"The field 'references' has duplicates", dataset_name)

# systemOfRecords # optional
check_url_field(False, item, "systemOfRecords", dataset_name, errs, allow_redacted=True)

Expand Down

0 comments on commit 385f5a2

Please sign in to comment.