Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/lxl 4610 allow dead links in bulk change spec #1552

Merged
merged 3 commits into from
Feb 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 5 additions & 10 deletions rest/src/main/groovy/whelk/rest/api/Crud.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import javax.servlet.http.HttpServletRequest
import javax.servlet.http.HttpServletResponse
import java.lang.management.ManagementFactory


import static whelk.rest.api.CrudUtils.ETag
import static whelk.util.http.HttpTools.getBaseUri
import static whelk.util.http.HttpTools.sendResponse
Expand Down Expand Up @@ -841,16 +842,10 @@ class Crud extends HttpServlet {
} else if (doc && doc.deleted) {
throw new OtherStatusException("Document has been deleted.", HttpServletResponse.SC_GONE)
} else {
def referencedBy = whelk.storage.followDependers(doc.getShortId(), JsonLd.ALLOW_LINK_TO_DELETED + jsonld.cascadingDeleteRelations())
if (!referencedBy.isEmpty()) {
def referencedByStr = referencedBy.collect { shortId, path -> "$shortId at $path" }.join(', ')
throw new OtherStatusException("This record may not be deleted, because it is referenced by other records: " + referencedByStr, HttpServletResponse.SC_FORBIDDEN)
} else {
log.debug("Removing resource at ${doc.getShortId()}")
String activeSigel = request.getHeader(XL_ACTIVE_SIGEL_HEADER)
whelk.remove(doc.getShortId(), "xl", activeSigel)
response.setStatus(HttpServletResponse.SC_NO_CONTENT)
}
log.debug("Removing resource at ${doc.getShortId()}")
String activeSigel = request.getHeader(XL_ACTIVE_SIGEL_HEADER)
whelk.remove(doc.getShortId(), "xl", activeSigel)
response.setStatus(HttpServletResponse.SC_NO_CONTENT)
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,11 @@ class HttpTools {
switch(e) {
case BadRequestException:
case ModelValidationException:
case LinkValidationException:
return HttpServletResponse.SC_BAD_REQUEST

case LinkValidationException:
return HttpServletResponse.SC_FORBIDDEN

case NotFoundException:
return HttpServletResponse.SC_NOT_FOUND

Expand Down
29 changes: 21 additions & 8 deletions whelk-core/src/main/groovy/whelk/JsonLd.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -54,16 +54,26 @@ class JsonLd {

public static final List<String> NON_DEPENDANT_RELATIONS = ['narrower', 'broader', 'expressionOf', 'related',
'derivedFrom']
public static final List<String> ALLOW_LINK_TO_DELETED = [
'meta.derivedFrom', 'hasTitle.source', 'bulk:changeSpec.bulk:deprecate',
/* following are combinations only needed while there are local unlinked works */
'translationOf.hasTitle.source', 'instanceOf.hasTitle.source', 'instanceOf.translationOf.hasTitle.source']

// The following relations may hold links to deleted resources.
// In general we don't allow dead links within XL so don't add to this list without good reason.
private static final List<String> WEAK_RELATIONS = [
'concerning',
'meta.derivedFrom',
'hasTitle.source',
'bulk:changeSpec.*',
/* following are combinations only needed while there are local unlinked works */
'translationOf.hasTitle.source',
'instanceOf.hasTitle.source',
'instanceOf.translationOf.hasTitle.source'
]

static final class Category {
public static final String DEPENDENT = 'dependent'
public static final String INTEGRAL = 'integral'
}


public static final Set<String> LD_KEYS

static {
Expand Down Expand Up @@ -91,6 +101,7 @@ class JsonLd {
public static final String INVERSE_OF = "inverseOf"
public static final String EQUIVALENT_CLASS = "equivalentClass"
public static final String EQUIVALENT_PROPERTY = "equivalentProperty"
public static final String SAME_AS = "sameAs"
}

static final class Rdfs {
Expand Down Expand Up @@ -404,6 +415,12 @@ class JsonLd {
jsonLd.size() == 1 && jsonLd[ID_KEY]
}

static boolean isWeak(String relation) {
return WEAK_RELATIONS.any { wr ->
wr == relation || (wr.endsWith("*") && relation.startsWith(wr.take(wr.size() - 1)))
}
}

static URI findRecordURI(Map jsonLd) {
String foundIdentifier = findIdentifier(jsonLd)
if (foundIdentifier) {
Expand Down Expand Up @@ -702,10 +719,6 @@ class JsonLd {
return categories.get(category, Collections.EMPTY_SET)
}

Set <String> cascadingDeleteRelations() {
getCategoryMembers(Category.DEPENDENT)
}

boolean isIntegral(String property) {
getCategoryMembers(Category.INTEGRAL).contains(property)
}
Expand Down
14 changes: 13 additions & 1 deletion whelk-core/src/main/groovy/whelk/Whelk.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import java.time.Instant
import java.time.ZoneId

import static whelk.FeatureFlags.Flag.INDEX_BLANK_WORKS
import static whelk.exception.LinkValidationException.IncomingLinksException

/**
* The Whelk is the root component of the XL system.
Expand Down Expand Up @@ -556,7 +557,10 @@ class Whelk {
log.warn "Could not remove object from whelk. No entry with id $id found"
}
if (doc) {
storage.remove(id, changedIn, changedBy, force)
if (!force) {
assertNoDependers(doc)
}
storage.remove(id, changedIn, changedBy)
indexAsyncOrSync {
elastic.remove(id)
if (features.isEnabled(INDEX_BLANK_WORKS)) {
Expand All @@ -569,6 +573,14 @@ class Whelk {
}
}

private void assertNoDependers(Document doc) {
boolean isDependedUpon = storage.getIncomingLinkCountByIdAndRelation(doc.getShortId())
.any { relation, _ -> !JsonLd.isWeak(relation) }
if (isDependedUpon) {
throw new IncomingLinksException("Record is referenced by other records")
}
}

static boolean hasChangedMainEntityId(Document updated, Document preUpdateDoc) {
preUpdateDoc.getThingIdentifiers()[0] &&
updated.getThingIdentifiers()[0] &&
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ import whelk.IdType
import whelk.JsonLd
import whelk.Link
import whelk.exception.CancelUpdateException
import whelk.exception.LinkValidationException
import whelk.exception.MissingMainIriException
import whelk.exception.StaleUpdateException
import whelk.exception.StorageCreateFailedException
Expand Down Expand Up @@ -47,7 +46,9 @@ import java.util.regex.Pattern

import static groovy.transform.TypeCheckingMode.SKIP
import static java.sql.Types.OTHER
import static whelk.JsonLd.Owl.SAME_AS
import static whelk.util.Jackson.mapper
import static whelk.exception.LinkValidationException.OutgoingLinksException

/**
* It is important to not grab more than one connection per request/thread to avoid connection related deadlocks.
Expand Down Expand Up @@ -457,7 +458,7 @@ class PostgreSQLComponent {
private static final String DELETE_USER_DATA =
"DELETE FROM lddb__user_data WHERE id = ?"

private static final String GET_IRI_IS_LINKABLE = """
private static final String GET_IRI_IS_DELETED = """
SELECT lddb.deleted
FROM lddb__identifiers
JOIN lddb ON lddb__identifiers.id = lddb.id WHERE lddb__identifiers.iri = ?
Expand Down Expand Up @@ -775,8 +776,7 @@ class PostgreSQLComponent {
throw new ConflictingHoldException("Already exists a holding record for ${heldBy} and bib: $holdingFor")
}

if (linkFinder != null)
linkFinder.normalizeIdentifiers(doc)
assertNoLinksToDeleted(doc.getExternalRefs())

//FIXME: throw exception on null changedBy
if (changedBy != null) {
Expand Down Expand Up @@ -995,7 +995,12 @@ class PostgreSQLComponent {
}

boolean deleted = doc.getDeleted()


if (!deleted) {
var addedLinks = doc.getExternalRefs() - preUpdateDoc.getExternalRefs()
assertNoLinksToDeleted(addedLinks)
}

if (collection == "hold") {
checkLinkedShelfMarkOwnership(doc, connection)

Expand Down Expand Up @@ -1230,10 +1235,7 @@ class PostgreSQLComponent {
}

getSystemIds(linksByIri.keySet(), connection) { String iri, String systemId, boolean deleted ->
if (deleted && !JsonLd.ALLOW_LINK_TO_DELETED.containsAll(linksByIri[iri]*.relation))
throw new LinkValidationException("Forbidden link(s) to deleted resource ${systemId} found in ${linksByIri[iri]*.relation}")

if (systemId != doc.getShortId()) // Exclude A -> A (self-references)
if (!deleted && systemId != doc.getShortId()) // Exclude A -> A (self-references)
dependencies.addAll(linksByIri[iri].collect { [it.relation, systemId] as String[] })
}

Expand Down Expand Up @@ -2079,19 +2081,18 @@ class PostgreSQLComponent {
}
}

boolean iriIsLinkable(String iri, String path) {
if (path in JsonLd.ALLOW_LINK_TO_DELETED) {
return true
}
boolean isDeleted(String iri) {
withDbConnection {
PreparedStatement preparedStatement = null
ResultSet rs = null
try {
preparedStatement = getMyConnection().prepareStatement(GET_IRI_IS_LINKABLE)
preparedStatement = getMyConnection().prepareStatement(GET_IRI_IS_DELETED)
preparedStatement.setString(1, iri)
rs = preparedStatement.executeQuery()

if (rs.next())
return !rs.getBoolean(1) // not deleted
return rs.getBoolean(1) // deleted
// not in lddb
return false
}
finally {
Expand Down Expand Up @@ -2756,16 +2757,8 @@ class PostgreSQLComponent {
}
}

void remove(String identifier, String changedIn, String changedBy, boolean force=false) {
void remove(String identifier, String changedIn, String changedBy) {
if (versioning) {
if (!force) {
def allow = JsonLd.ALLOW_LINK_TO_DELETED + (jsonld?.cascadingDeleteRelations() ?: Collections.EMPTY_SET)
def referencedBy = followDependers(identifier, allow)
if (!referencedBy.isEmpty()) {
throw new RuntimeException("Deleting depended upon records is not allowed.")
}
}

log.debug("Marking document with ID ${identifier} as deleted.")
try {
storeUpdate(identifier, false, true, changedIn, changedBy,
Expand Down Expand Up @@ -2961,7 +2954,16 @@ class PostgreSQLComponent {
}
}
}


private void assertNoLinksToDeleted(Set<Link> links) {
links.each {link ->
// sameAs is allowed because when merging two entities, the id of the deleted entity is added to sameAs of the remaining entity
if (link.property() != SAME_AS && isDeleted(link.iri)) {
throw new OutgoingLinksException("Document contains link to deleted resource $link.iri at path $link.relation")
}
}
}

class NotificationListener extends Thread {
private static final String NAME = 'pg_listener'
private static final Counter counter = Counter.build()
Expand Down
Loading