diff --git a/designsafe/apps/api/projects_v2/operations/datacite_operations.py b/designsafe/apps/api/projects_v2/operations/datacite_operations.py index 031868102..cdb781023 100644 --- a/designsafe/apps/api/projects_v2/operations/datacite_operations.py +++ b/designsafe/apps/api/projects_v2/operations/datacite_operations.py @@ -247,3 +247,10 @@ def hide_datacite_doi(doi: str): timeout=30, ) return res.json() + + +def get_doi_publication_date(doi: str) -> str: + """Look up the publication date for a DOI""" + res = requests.get(f"{settings.DATACITE_URL.strip('/')}/dois/{doi}", timeout=30) + res.raise_for_status() + return res.json()["data"]["attributes"]["created"] diff --git a/designsafe/apps/api/projects_v2/operations/project_publish_operations.py b/designsafe/apps/api/projects_v2/operations/project_publish_operations.py index 13ac47df4..ea001db0c 100644 --- a/designsafe/apps/api/projects_v2/operations/project_publish_operations.py +++ b/designsafe/apps/api/projects_v2/operations/project_publish_operations.py @@ -8,6 +8,7 @@ import datetime from pathlib import Path import logging +import requests from django.conf import settings from django.db import close_old_connections import networkx as nx @@ -19,6 +20,7 @@ get_datacite_json, publish_datacite_doi, upsert_datacite_json, + get_doi_publication_date, ) from designsafe.apps.api.projects_v2.operations.project_archive_operations import ( archive_publication_async, @@ -319,21 +321,22 @@ def get_publication_subtree( return subtree, path_mapping -def fix_publication_dates(existing_tree: nx.DiGraph, incoming_tree: nx.DiGraph): +def fix_publication_dates(incoming_tree: nx.DiGraph): """ Update publication date on versioned pubs to match the initial publication date. """ - initial_pub_dates = {} - for published_entity in existing_tree.successors("NODE_ROOT"): - published_uuid = existing_tree.nodes[published_entity]["uuid"] - initial_pub_dates[published_uuid] = existing_tree.nodes[published_entity][ - "publicationDate" - ] - for node in incoming_tree: - if incoming_tree.nodes[node]["uuid"] in initial_pub_dates: - incoming_tree.nodes[node]["publicationDate"] = initial_pub_dates[ - incoming_tree.nodes[node]["uuid"] - ] + + for incoming_node in incoming_tree.successors("NODE_ROOT"): + node_data = incoming_tree.nodes[incoming_node] + existing_doi = next(iter(node_data["value"].get("dois", [])), None) + if existing_doi: + try: + existing_pub_date = get_doi_publication_date(existing_doi) + incoming_tree.nodes[incoming_node]["publicationDate"] = ( + datetime.datetime.fromisoformat(existing_pub_date).isoformat() + ) + except requests.HTTPError: + logger.error("Datacite lookup error for DOI %s", existing_doi) return incoming_tree @@ -356,12 +359,13 @@ def get_publication_full_tree( full_tree = nx.compose_all(subtrees) + # Update publication date on versioned/amended pubs to match the initial publication date. + full_tree = fix_publication_dates(full_tree) + if version and version > 1: existing_pub = Publication.objects.get(project_id=project_id) published_tree: nx.DiGraph = nx.node_link_graph(existing_pub.tree) - # Update publication date on versioned pubs to match the initial publication date. - full_tree = fix_publication_dates(published_tree, full_tree) full_tree = nx.compose(published_tree, full_tree) return full_tree, full_path_mapping