Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add DFG-to-FOS mappings utility function #147

Merged
merged 3 commits into from
Oct 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions lib/bolognese/doi_utils.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

module Bolognese
module DoiUtils
class << self
include DoiUtils
end

def validate_doi(doi)
doi = Array(/\A(?:(http|https):\/(\/)?(dx\.)?(doi.org|handle.stage.datacite.org|handle.test.datacite.org)\/)?(doi:)?(10\.\d{4,5}\/.+)\z/.match(doi)).last
# remove non-printing whitespace and downcase
Expand Down
59 changes: 50 additions & 9 deletions lib/bolognese/utils.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

module Bolognese
module Utils
class << self
include Utils
end

NORMALIZED_LICENSES = {
"https://creativecommons.org/licenses/by/1.0" => "https://creativecommons.org/licenses/by/1.0/legalcode",
"https://creativecommons.org/licenses/by/2.0" => "https://creativecommons.org/licenses/by/2.0/legalcode",
Expand Down Expand Up @@ -484,6 +488,28 @@ module Utils
":etal" => "too numerous to list (et alia)"
}

RESOURCE_PATHS = {
spdx: 'spdx/licenses.json',
fos: 'oecd/fos-mappings.json',
for: 'oecd/for-mappings.json',
dfg: 'oecd/dfg-mappings.json'
}

def resources_dir_path
File.expand_path('../../../resources', __FILE__) + '/'
end

def resource_file( extra_path )
File.read(resources_dir_path + extra_path)
end

def resource_json( resource_symbol )
if RESOURCE_PATHS.keys().include?(resource_symbol)
JSON.load(resource_file(RESOURCE_PATHS[resource_symbol]))
end
end


def find_from_format(id: nil, string: nil, ext: nil, filename: nil)
if id.present?
find_from_format_by_id(id)
Expand Down Expand Up @@ -615,7 +641,7 @@ def normalize_id(id, options={})
return nil unless id.present?

# check for valid DOI
doi = normalize_doi(id, options)
doi = DoiUtils::normalize_doi(id, options)
return doi if doi.present?

# check for valid HTTP uri
Expand Down Expand Up @@ -674,8 +700,8 @@ def normalize_ror(ror)
def normalize_ids(ids: nil, relation_type: nil)
Array.wrap(ids).select { |idx| idx["@id"].present? }.map do |idx|
id = normalize_id(idx["@id"])
related_identifier_type = doi_from_url(id).present? ? "DOI" : "URL"
id = doi_from_url(id) || id
related_identifier_type = DoiUtils::doi_from_url(id).present? ? "DOI" : "URL"
id = DoiUtils::doi_from_url(id) || id

{ "relatedIdentifier" => id,
"relationType" => relation_type,
Expand Down Expand Up @@ -1233,7 +1259,7 @@ def jsonlint(json)
end

def name_to_spdx(name)
spdx = JSON.load(File.read(File.expand_path('../../../resources/spdx/licenses.json', __FILE__))).fetch("licenses")
spdx = resource_json(:spdx).fetch("licenses")
license = spdx.find { |l| l["name"] == name || l["licenseId"] == name || l["seeAlso"].first == normalize_cc_url(name) }

if license
Expand All @@ -1249,7 +1275,7 @@ def name_to_spdx(name)
end

def hsh_to_spdx(hsh)
spdx = JSON.load(File.read(File.expand_path('../../../resources/spdx/licenses.json', __FILE__))).fetch("licenses")
spdx = resource_json(:spdx).fetch("licenses")
license = spdx.find { |l| l["licenseId"].casecmp?(hsh["rightsIdentifier"]) || l["seeAlso"].first == normalize_cc_url(hsh["rightsURI"]) || l["name"] == hsh["rights"] || l["seeAlso"].first == normalize_cc_url(hsh["rights"]) }

if license
Expand All @@ -1273,7 +1299,7 @@ def hsh_to_spdx(hsh)

def name_to_fos(name)
# first find subject in Fields of Science (OECD)
fos = JSON.load(File.read(File.expand_path('../../../resources/oecd/fos-mappings.json', __FILE__))).fetch("fosFields")
fos = resource_json(:fos).fetch("fosFields")

subject = fos.find { |l| l["fosLabel"] == name || "FOS: " + l["fosLabel"] == name }

Expand All @@ -1289,7 +1315,7 @@ def name_to_fos(name)

# if not found, look in Fields of Research (Australian and New Zealand Standard Research Classification)
# and map to Fields of Science. Add an extra entry for the latter
fores = JSON.load(File.read(File.expand_path('../../../resources/oecd/for-mappings.json', __FILE__)))
fores = resource_json(:for)
for_fields = fores.fetch("forFields")
for_disciplines = fores.fetch("forDisciplines")

Expand All @@ -1311,7 +1337,7 @@ def name_to_fos(name)

def hsh_to_fos(hsh)
# first find subject in Fields of Science (OECD)
fos = JSON.load(File.read(File.expand_path('../../../resources/oecd/fos-mappings.json', __FILE__))).fetch("fosFields")
fos = resource_json(:fos).fetch("fosFields")
subject = fos.find { |l| l["fosLabel"] == hsh["__content__"] || "FOS: " + l["fosLabel"] == hsh["__content__"] || l["fosLabel"] == hsh["subject"]}

if subject
Expand All @@ -1330,7 +1356,7 @@ def hsh_to_fos(hsh)

# if not found, look in Fields of Research (Australian and New Zealand Standard Research Classification)
# and map to Fields of Science. Add an extra entry for the latter
fores = JSON.load(File.read(File.expand_path('../../../resources/oecd/for-mappings.json', __FILE__)))
fores = resource_json(:for)
for_fields = fores.fetch("forFields")
for_disciplines = fores.fetch("forDisciplines")

Expand Down Expand Up @@ -1369,5 +1395,20 @@ def hsh_to_fos(hsh)
"lang" => hsh["lang"] }.compact]
end
end

def dfg_ids_to_fos(dfg_ids)
dfgs = resource_json(:dfg).fetch("dfgFields")
ids = Array.wrap(dfg_ids)

subjects = dfgs.select { |l| ids.include?(l["dfgId"])}
subjects.map do |subject|
{
"classificationCode" => subject["fosId"],
"subject" => subject["fosLabel"],
"subjectScheme" => "Fields of Science and Technology (FOS)",
"schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf"
}
end
end
end
end
57 changes: 53 additions & 4 deletions spec/utils_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,8 @@

require 'spec_helper'

describe Bolognese::Metadata, vcr: true do
let(:input) { "https://doi.org/10.1101/097196" }

subject { Bolognese::Metadata.new(input: input, from: "crossref") }
describe Bolognese::Utils do
subject { Bolognese::Utils }

context "validate url" do
it "DOI" do
Expand Down Expand Up @@ -614,4 +612,55 @@
end

end

context "DFG mappings" do
it "dfg_id_to_fos match" do
dfg_id = "20108"
response = subject.dfg_ids_to_fos(dfg_id)
expect(response).to eq([
{
"classificationCode" =>"1.6",
"subject" => "Biological sciences",
"subjectScheme" => "Fields of Science and Technology (FOS)",
"schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf"
},
{
"classificationCode" =>"3.1",
"subject" => "Basic medicine",
"subjectScheme" => "Fields of Science and Technology (FOS)",
"schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf"
},
])
end
it "dfg_id_to_fos no match" do
dfg_id = "000"
response = subject.dfg_ids_to_fos(dfg_id)
expect(response).to eq([])
end

it "dfg_id_to_fos match list" do
dfg_ids = ["101", "20108"]
response = subject.dfg_ids_to_fos(dfg_ids)
expect(response).to eq([
{"classificationCode"=>"6.1",
"schemeUri"=>"http://www.oecd.org/science/inno/38235147.pdf",
"subject"=>"History and archaeology",
"subjectScheme"=>"Fields of Science and Technology (FOS)"},
{"classificationCode"=>"6.2",
"schemeUri"=>"http://www.oecd.org/science/inno/38235147.pdf",
"subject"=>"Languages and literature",
"subjectScheme"=>"Fields of Science and Technology (FOS)"},
{"classificationCode"=>"1.6",
"schemeUri"=>"http://www.oecd.org/science/inno/38235147.pdf",
"subject"=>"Biological sciences",
"subjectScheme"=>"Fields of Science and Technology (FOS)"},
{"classificationCode"=>"3.1",
"schemeUri"=>"http://www.oecd.org/science/inno/38235147.pdf",
"subject"=>"Basic medicine",
"subjectScheme"=>"Fields of Science and Technology (FOS)"}
])
end


end
end