diff --git a/akvo/cordaid_org_importer.py b/akvo/cordaid_org_importer.py index 40aa75a228..ea432313e5 100644 --- a/akvo/cordaid_org_importer.py +++ b/akvo/cordaid_org_importer.py @@ -11,6 +11,7 @@ import os from os.path import splitext +import sys from lxml import etree @@ -21,16 +22,55 @@ from akvo.rsr.models import InternalOrganisationID, Organisation, PartnerType from akvo.rsr.utils import model_and_instance_based_filename - -CORDAID_DIR = "/Users/gabriel/git/akvo-rsr/akvo/api/xml/cordaid" -CORDAID_XML_FILE = os.path.join(CORDAID_DIR, "cordaid_orgs_from_live.xml") +CORDAID_DIR = "/var/tmp/cordaid" +#CORDAID_DIR = "/Users/gabriel/git/akvo-rsr/akvo/api/xml/cordaid" +CORDAID_XML_FILE = os.path.join(CORDAID_DIR, "org_import.xml") +#CORDAID_XML_FILE = os.path.join(CORDAID_DIR, "cordaid_orgs_from_live.xml") CORDAID_LOGOS_DIR = os.path.join(CORDAID_DIR, "org_logos") CORDAID_ORG_ID = 273 +LOG_FILE = os.path.join(CORDAID_DIR, "org_import_log.txt") + + +def log_to_file(text, log_file=LOG_FILE): + text = u"{text}\n".format(text=text) + with open(log_file, "a") as log_file: + log_file.write(text) + sys.stdout.write(text) + + +def run_organisation_import_report(data): + log_to_file("Totals") + log_to_file("======") + log_to_file("Organisations updated: {updated}".format(updated=data["updated"])) + log_to_file("Organisations created: {created}".format(created=data["created"])) + log_to_file("TOTAL imported: {total}".format( + total=(data["created"] + data["updated"]) + )) + log_to_file("Errors") + log_to_file("======") + log_to_file("{failed} organisations could not be imported".format( + failed=data["failed"] + )) + + +def run_logo_import_report(data): + log_to_file("Totals") + log_to_file("======") + log_to_file("Logos successfully uploaded: {succeeded}".format( + succeeded=data["succeeded"] + )) + log_to_file("Logos which failed to upload: {failed}".format( + failed=data["failed"] + )) + + def get_organisation_type(new_organisation_type): - return dict(zip([type for type, name in IATI_LIST_ORGANISATION_TYPE], - Organisation.NEW_TO_OLD_TYPES))[new_organisation_type] + types = dict(zip([type for type, name in IATI_LIST_ORGANISATION_TYPE], + Organisation.NEW_TO_OLD_TYPES + )) + return types[new_organisation_type] def normalize_url(url): @@ -47,59 +87,85 @@ def normalize_url(url): def import_orgs(xml_file): with open(xml_file, "rb") as f: + report_data = dict(created=0, failed=0, updated=0) root = etree.fromstring(f.read()) for element in root: recording_org = Organisation.objects.get(id=CORDAID_ORG_ID) identifier = element.findtext("org_id") - try: # Find the existing RSR InternalOrganisationID and Organisation + try: # Find existing InternalOrganisationID & Organisation ... + action = "updated" internal_org_id = InternalOrganisationID.objects.get( - recording_org=recording_org, - identifier=identifier) + recording_org=recording_org, + identifier=identifier + ) referenced_org = internal_org_id.referenced_org - action = "Updated" - except: - referenced_org = Organisation() - referenced_org.save() - internal_org_id = InternalOrganisationID( + except: # ... or attempt to create new ones + try: + action = "created" + referenced_org = Organisation() + referenced_org.save() + internal_org_id = InternalOrganisationID( recording_org=recording_org, referenced_org=referenced_org, - identifier=identifier) - internal_org_id.save() - for partner_type in PartnerType.objects.all(): - referenced_org.partner_types.add(partner_type) - action = "*** Created" + identifier=identifier + ) + internal_org_id.save() + for partner_type in PartnerType.objects.all(): + referenced_org.partner_types.add(partner_type) + except Exception, e: + action = "failed" + internal_org_id.delete() + referenced_org.delete() + print(u"*** UNABLE TO CREATE NEW ORGANISATION! " + "Reason: {message}.".format(e.message)) name = element.findtext("name") referenced_org.name, referenced_org.long_name = name[:25], name referenced_org.description = element.findtext("description") or "N/A" referenced_org.url = normalize_url(element.findtext("url")) referenced_org.new_organisation_type = int(element.findtext("iati_organisation_type")) referenced_org.organisation_type = get_organisation_type(referenced_org.new_organisation_type) - print(u"{action} Organisation {org_id}. Name, {name}, InternalOrganisationID: {internal_org_id}, ".format( - action=action, org_id=referenced_org.id, name=referenced_org.name, internal_org_id=internal_org_id.pk)) + report_data[action] += 1 referenced_org.save() + log_to_file(u"{action} organisation {org_id} " + "(Cordaid ID: {cordaid_org_id})".format( + action=action.upper(), + cordaid_org_id=internal_org_id.identifier, + org_id=referenced_org.id + )) + run_organisation_import_report(report_data) def import_images(logo_dir): + report_data = dict(failed=0, succeeded=0) for logo_name in os.listdir(logo_dir): + error_message = "" identifier, extension = splitext(logo_name) if extension.lower() in (".png", ".jpg", ".jpeg", ".gif"): try: internal_org_id = InternalOrganisationID.objects.get( - recording_org=Organisation.objects.get(id=CORDAID_ORG_ID), - identifier=identifier) + recording_org=Organisation.objects.get(id=CORDAID_ORG_ID), + identifier=identifier) org = internal_org_id.referenced_org filename = model_and_instance_based_filename( - "Organisation", org.pk, "logo", logo_name) + "Organisation", org.pk, "logo", logo_name + ) with open(os.path.join(logo_dir, logo_name), "rb") as f: logo_data = f.read() logo_tmp = NamedTemporaryFile(delete=True) logo_tmp.write(logo_data) logo_tmp.flush() org.logo.save(filename, File(logo_tmp), save=True) - print("Uploaded logo to Organisation {org_name}.".format( - org_name=org.long_name)) - except: - print("Logo upload failed. No matching organisations found.") + action = "succeeded" + except Exception, e: + action = "failed" + error_message = "with the following error message: {error_message}".format( + error_message=e.message + ) + report_data[action] += 1 + log_to_file(u"Upload of image to organisation {org_id} {action} {error_message}".format( + org_id=org.id, action=action, error_message=error_message + )) + run_logo_import_report(report_data) if __name__ == "__main__":