Skip to content

Commit

Permalink
Ensure correct publication dates after subsequent publish (#1436)
Browse files Browse the repository at this point in the history
  • Loading branch information
jarosenb authored Sep 12, 2024
1 parent 83a6b8c commit 2274548
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -247,3 +247,10 @@ def hide_datacite_doi(doi: str):
timeout=30,
)
return res.json()


def get_doi_publication_date(doi: str) -> str:
"""Look up the publication date for a DOI"""
res = requests.get(f"{settings.DATACITE_URL.strip('/')}/dois/{doi}", timeout=30)
res.raise_for_status()
return res.json()["data"]["attributes"]["created"]
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import datetime
from pathlib import Path
import logging
import requests
from django.conf import settings
from django.db import close_old_connections
import networkx as nx
Expand All @@ -19,6 +20,7 @@
get_datacite_json,
publish_datacite_doi,
upsert_datacite_json,
get_doi_publication_date,
)
from designsafe.apps.api.projects_v2.operations.project_archive_operations import (
archive_publication_async,
Expand Down Expand Up @@ -319,21 +321,22 @@ def get_publication_subtree(
return subtree, path_mapping


def fix_publication_dates(existing_tree: nx.DiGraph, incoming_tree: nx.DiGraph):
def fix_publication_dates(incoming_tree: nx.DiGraph):
"""
Update publication date on versioned pubs to match the initial publication date.
"""
initial_pub_dates = {}
for published_entity in existing_tree.successors("NODE_ROOT"):
published_uuid = existing_tree.nodes[published_entity]["uuid"]
initial_pub_dates[published_uuid] = existing_tree.nodes[published_entity][
"publicationDate"
]
for node in incoming_tree:
if incoming_tree.nodes[node]["uuid"] in initial_pub_dates:
incoming_tree.nodes[node]["publicationDate"] = initial_pub_dates[
incoming_tree.nodes[node]["uuid"]
]

for incoming_node in incoming_tree.successors("NODE_ROOT"):
node_data = incoming_tree.nodes[incoming_node]
existing_doi = next(iter(node_data["value"].get("dois", [])), None)
if existing_doi:
try:
existing_pub_date = get_doi_publication_date(existing_doi)
incoming_tree.nodes[incoming_node]["publicationDate"] = (
datetime.datetime.fromisoformat(existing_pub_date).isoformat()
)
except requests.HTTPError:
logger.error("Datacite lookup error for DOI %s", existing_doi)

return incoming_tree

Expand All @@ -356,12 +359,13 @@ def get_publication_full_tree(

full_tree = nx.compose_all(subtrees)

# Update publication date on versioned/amended pubs to match the initial publication date.
full_tree = fix_publication_dates(full_tree)

if version and version > 1:
existing_pub = Publication.objects.get(project_id=project_id)
published_tree: nx.DiGraph = nx.node_link_graph(existing_pub.tree)

# Update publication date on versioned pubs to match the initial publication date.
full_tree = fix_publication_dates(published_tree, full_tree)
full_tree = nx.compose(published_tree, full_tree)

return full_tree, full_path_mapping
Expand Down

0 comments on commit 2274548

Please sign in to comment.