Skip to content

Commit

Permalink
add reports to detect existence of cocina property in dros and collec…
Browse files Browse the repository at this point in the history
…tions
  • Loading branch information
ndushay committed Sep 26, 2022
1 parent ec15439 commit eb7d041
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 0 deletions.
44 changes: 44 additions & 0 deletions app/reports/property_existence_collections.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# frozen_string_literal: true

# Report collection objects with occurences of a property.

# Invoke via:
# bin/rails r -e production "PropertyExistenceCollections.report"
class PropertyExistenceCollections
# NOTE: Prefer strict JSON querying over lax when using the `.**` operator, per
# https://www.postgresql.org/docs/14/functions-json.html#STRICT-AND-LAX-MODES
#
# > The .** accessor can lead to surprising results when using the lax mode.
# > ... This happens because the .** accessor selects both the segments array
# > and each of its elements, while the .HR accessor automatically unwraps
# > arrays when using the lax mode. To avoid surprising results, we recommend
# > using the .** accessor only in the strict mode.
JSONB_PATH = 'strict $.**.groupedValue ? (@.size() > 0)' # when property is array
# JSONB_PATH = 'strict $.**.contributor.type' # when property is a string - maybe keep size check to avoid empty values
SQL = <<~SQL.squish.freeze
SELECT external_identifier as collection_druid,
jsonb_path_query(identification, '$.catalogLinks[*] ? (@.catalog == "symphony").catalogRecordId') ->> 0 as catkey
FROM "collections" WHERE
jsonb_path_exists(collections.description, '#{JSONB_PATH}')
SQL

def self.report
puts "collection_druid,catkey,collection_name\n"
rows(SQL).compact.each { |row| puts row }
end

def self.rows(sql_query)
sql_result_rows = ActiveRecord::Base.connection.execute(sql_query).to_a

sql_result_rows.map do |row|
collection_druid = row['collection_druid']
collection_name = Collection.find_by(external_identifier: collection_druid)&.label

[
collection_druid,
row['catkey'],
"\"#{collection_name}\""
].join(',')
end
end
end
46 changes: 46 additions & 0 deletions app/reports/property_existence_dros.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# frozen_string_literal: true

# Report dro objects with occurences of a property.

# Invoke via:
# bin/rails r -e production "PropertyExistenceDros.report"
class PropertyExistenceDros
# NOTE: Prefer strict JSON querying over lax when using the `.**` operator, per
# https://www.postgresql.org/docs/14/functions-json.html#STRICT-AND-LAX-MODES
#
# > The .** accessor can lead to surprising results when using the lax mode.
# > ... This happens because the .** accessor selects both the segments array
# > and each of its elements, while the .HR accessor automatically unwraps
# > arrays when using the lax mode. To avoid surprising results, we recommend
# > using the .** accessor only in the strict mode.
JSONB_PATH = 'strict $.**.contributor.parallelContributor ? (@.size() > 0)' # when property is array
# JSONB_PATH = 'strict $.**.contributor.type' # when property is a string - maybe keep size check to avoid empty values
SQL = <<~SQL.squish.freeze
SELECT external_identifier as item_druid,
jsonb_path_query(identification, '$.catalogLinks[*] ? (@.catalog == "symphony").catalogRecordId') ->> 0 as catkey,
jsonb_path_query(structural, '$.isMemberOf') ->> 0 as collection_id
FROM "dros" WHERE
jsonb_path_exists(dros.description, '#{JSONB_PATH}')
SQL

def self.report
puts "item_druid,catkey,collection_druid,collection_name\n"
rows(SQL).compact.each { |row| puts row }
end

def self.rows(sql_query)
sql_result_rows = ActiveRecord::Base.connection.execute(sql_query).to_a

sql_result_rows.map do |row|
collection_druid = row['collection_druid']
collection_name = Collection.find_by(external_identifier: collection_druid)&.label

[
row['item_druid'],
row['catkey'],
collection_druid,
"\"#{collection_name}\""
].join(',')
end
end
end

0 comments on commit eb7d041

Please sign in to comment.