diff --git a/app/controllers/catalog_controller.rb b/app/controllers/catalog_controller.rb index b378fb67..59a901bd 100644 --- a/app/controllers/catalog_controller.rb +++ b/app/controllers/catalog_controller.rb @@ -234,7 +234,7 @@ def retry_on_exception # except in the relevancy case). Add the sort: option to configure a # custom Blacklight url parameter value separate from the Solr sort fields. config.add_sort_field 'relevance', sort: 'score desc, year_available_itsi desc, title_si asc', label: 'relevance' - config.add_sort_field 'year', sort: 'year_available_itsi desc, title_si asc', label: 'year' + config.add_sort_field 'year', sort: 'year_available_itsi desc, pdc_created_at_dtsi desc, title_si asc', label: 'year' config.add_sort_field 'author', sort: 'author_si asc, title_si asc', label: 'author' config.add_sort_field 'title', sort: 'title_si asc, year_available_itsi desc', label: 'title' diff --git a/app/lib/date_normalizer.rb b/app/lib/date_normalizer.rb index 2c55b96f..ace111a2 100644 --- a/app/lib/date_normalizer.rb +++ b/app/lib/date_normalizer.rb @@ -49,11 +49,15 @@ def self.year_from_date(date_string) Date.strptime(date_string).strftime('%Y').to_i elsif date_string.match?(/\d{4}-\d{2}/) Date.strptime(date_string, '%Y-%m').strftime('%Y').to_i + elsif date_string.match?(/^\d{4}/) && date_string.size == 4 + date_string.to_i else - date_string.to_i + time = Time.parse(date_string) + time.year end rescue ArgumentError # bad formatted date + Rails.logger.warn("Error parsing date #{date_string}") nil end end diff --git a/lib/traject/dataspace_research_data_config.rb b/lib/traject/dataspace_research_data_config.rb index f995cfa3..1d8240c9 100644 --- a/lib/traject/dataspace_research_data_config.rb +++ b/lib/traject/dataspace_research_data_config.rb @@ -114,7 +114,7 @@ # single value is used for sorting to_field 'author_si' do |record, accumulator, _c| values = record.xpath("/item/metadata/key[text()='dc.contributor.author']/../value").map(&:text) - accumulator.concat [values.uniq.sort.first] + accumulator.concat [values.first] end # all values as strings for faceting @@ -234,6 +234,17 @@ accumulator.concat DateNormalizer.format_array_for_display(issue_dates) end +# Add a signgular field for sorting +to_field "issue_date_si" do |record, accumulator, _context| + issue_dates = record.xpath("/item/metadata/key[text()='dc.date.issued']/../value").map(&:text) + accumulator.concat DateNormalizer.format_array_for_display(issue_dates) +end + +to_field "issue_date_si" do |record, accumulator, _context| + issue_dates = record.xpath("/item/metadata/key[text()='dcterms.issued']/../value").map(&:text) + accumulator.concat DateNormalizer.format_array_for_display(issue_dates) +end + # Date in yyyy-mm-dd format so we can sort by it to_field "issue_date_strict_ssi" do |record, accumulator, _context| dates = record.xpath("/item/metadata/key[text()='dc.date.issued']/../value").map(&:text) diff --git a/lib/traject/pdc_describe_indexing_config.rb b/lib/traject/pdc_describe_indexing_config.rb index 5d410921..4502b5a3 100644 --- a/lib/traject/pdc_describe_indexing_config.rb +++ b/lib/traject/pdc_describe_indexing_config.rb @@ -89,7 +89,7 @@ # single value is used for sorting to_field 'author_si' do |record, accumulator, _c| author_names = record.xpath("/hash/resource/creators/creator/value").map(&:text) - accumulator.concat [author_names.uniq.sort.first] + accumulator.concat [author_names.first] end # all values as strings for faceting @@ -144,8 +144,27 @@ to_field 'issue_date_ssim', extract_xpath("/hash/resource/publication-year") +to_field 'issue_date_si', extract_xpath("/hash/resource/publication-year") + to_field 'pdc_created_at_dtsi', extract_xpath('/hash/created-at') +to_field "year_available_itsi" do |record, accumulator, _context| + year_value = record.xpath("/hash/resource/publication-year/text()") + if year_value.present? + accumulator.concat [year_value] + else + date_value = record.xpath("/hash/created-at/text()").to_s + date = begin + DateTime.parse(date_value) + rescue + nil + end + if date + accumulator.concat [date.year] + end + end +end + to_field 'pdc_updated_at_dtsi', extract_xpath('/hash/updated-at') # ================== diff --git a/spec/fixtures/files/pdc_describe_data/89.json b/spec/fixtures/files/pdc_describe_data/89.json index 9175ba6e..899a92f8 100644 --- a/spec/fixtures/files/pdc_describe_data/89.json +++ b/spec/fixtures/files/pdc_describe_data/89.json @@ -73,7 +73,7 @@ "resource_type": "Dataset", "resource_type_general": "Dataset", "publisher": "Princeton Plasma Physics Laboratory, Princeton University", - "publication_year": "2022", + "publication_year": "2023", "ark": "ark:/88435/dsp01wh246w38h", "doi": "10.34770/bm4s-t361", "rights_many": [ @@ -150,13 +150,7 @@ "errors": [] } ], - "keywords": [ - "HHFW", - "3D RF modeling", - "Petra-M", - "fast wave", - "NSTX-U" - ], + "keywords": ["HHFW", "3D RF modeling", "Petra-M", "fast wave", "NSTX-U"], "contributors": [], "organizational_contributors": [], "funders": [ @@ -167,15 +161,12 @@ "award_uri": "" } ], - "domains": [ - "Natural Sciences" - ], - "communities": [ - "Princeton Plasma Physics Laboratory" - ], + "domains": ["Natural Sciences"], + "communities": ["Princeton Plasma Physics Laboratory"], "subcommunities": [ "NSTX-U", - "Spherical Torus" + "Spherical Torus", + "Tokamak Experimental Sciences" ], "migrated": true }, @@ -321,9 +312,12 @@ ], "group": { "title": "Princeton Plasma Physics Lab (PPPL)", - "description": null, + "description": "", "code": "PPPL", "created_at": "2022-04-28T16:30:00.195-04:00", - "updated_at": "2023-05-18T14:20:45.272-04:00" - } + "updated_at": "2024-02-16T09:06:20.108-05:00" + }, + "embargo_date": null, + "created_at": "2023-08-18T13:47:14Z", + "updated_at": "2024-02-23T11:05:11Z" } \ No newline at end of file diff --git a/spec/fixtures/files/sowing_the_seeds.json b/spec/fixtures/files/sowing_the_seeds.json index 35937a9e..0f4fee3f 100644 --- a/spec/fixtures/files/sowing_the_seeds.json +++ b/spec/fixtures/files/sowing_the_seeds.json @@ -6,9 +6,18 @@ "title_type": null } ], - "description": "In 2017, seven members of the Archive-It Mid-Atlantic Users Group (AITMA) conducted a study of 14 subjects representative of their stakeholder populations to assess the usability of Archive-It, a web archiving subscription service of the Internet Archive. While Archive-It is the most widely-used tool for web archiving, little is known about how users interact with the service.This study intended to teach us what users expect from web archives, which exist as another form of archival material. End-user subjects executed four search tasks using the public Archive-It interface and the Wayback Machine to access archived information on websites from the facilitators’ own harvested collections and provide feedback about their experiences. The tasks were designed to have straightforward pass or fail outcomes,\r\n and the facilitators took notes on the subjects’ behavior and commentary during the sessions.Overall, participants reported mildly positive impressions of Archive-It public user interface based on their session. The study identified several key areas of improvement for the Archive-It service pertaining to metadata options, terminology display, indexing of dates, and the site’s search box.\r\n-\r\nDownload the README.txt for a detailed description of this dataset's content.", + "description": "In 2017, seven members of the Archive-It Mid-Atlantic Users Group (AITMA) conducted a study of 14 subjects representative of their stakeholder populations to assess the usability of Archive-It, a web archiving subscription service of the Internet Archive. While Archive-It is the most widely-used tool for web archiving, little is known about how users interact with the service. This study intended to teach us what users expect from web archives, which exist as another form of archival material. End-user subjects executed four search tasks using the public Archive-It interface and the Wayback Machine to access archived information on websites from the facilitators' own harvested collections and provide feedback about their experiences. The tasks were designed to have straightforward pass or fail outcomes,\r\nand the facilitators took notes on the subjects' behavior and commentary during the sessions. Overall, participants reported mildly positive impressions of Archive-It public user interface based on their session. The study identified several key areas of improvement for the Archive-It service pertaining to metadata options, terminology display, indexing of dates, and the site's search box.\r\n\r\nDownload the README.txt for a detailed description of this dataset's content.", "collection_tags": [], "creators": [ + { + "value": "Abrams, Samantha", + "name_type": "Personal", + "given_name": "Samantha", + "family_name": "Abrams", + "identifier": null, + "affiliations": [], + "sequence": 0 + }, { "value": "Antracoli, Alexis", "name_type": "Personal", @@ -62,40 +71,41 @@ "identifier": null, "affiliations": [], "sequence": 6 - }, - { - "value": "Abrams, Samantha", - "name_type": "Personal", - "given_name": "Samantha", - "family_name": "Abrams", - "identifier": null, - "affiliations": [], - "sequence": 7 } ], "resource_type": "Dataset", - "resource_type_general": "", + "resource_type_general": "Dataset", "publisher": "Princeton University", "publication_year": "2023", - "ark": null, - "doi": "10.34770/doc-1", - "rights": { - "identifier": "GPLv3", - "uri": "https://www.gnu.org/licenses/gpl-3.0.en.html", - "name": "GNU General Public License" - }, + "ark": "ark:/88435/dsp01d791sj97j", + "doi": "10.34770/00yp-2w12", + "rights_many": [ + { + "identifier": "CC BY", + "uri": "https://creativecommons.org/licenses/by/4.0/", + "name": "Creative Commons Attribution 4.0 International" + } + ], "version_number": "1", "related_objects": [], "keywords": [], "contributors": [], - "funders": [] + "organizational_contributors": [], + "funders": [], + "domains": [], + "communities": [], + "subcommunities": [], + "migrated": true }, "files": [], "group": { - "title": "Research Data", - "description": null, + "title": "Princeton Research Data Service (PRDS)", + "description": "", "code": "RD", - "created_at": "2023-01-05T11:26:07.005-05:00", - "updated_at": "2023-01-05T11:26:07.005-05:00" - } + "created_at": "2022-04-28T16:30:00.190-04:00", + "updated_at": "2024-01-23T11:29:40.724-05:00" + }, + "embargo_date": null, + "created_at": "2023-07-11T11:06:10Z", + "updated_at": "2023-09-13T08:23:50Z" } diff --git a/spec/lib/date_normalizer_spec.rb b/spec/lib/date_normalizer_spec.rb index 09a8f473..56c4d754 100644 --- a/spec/lib/date_normalizer_spec.rb +++ b/spec/lib/date_normalizer_spec.rb @@ -4,6 +4,7 @@ let(:years) { ['2015'] } let(:months_and_years) { ['2015-08'] } let(:timestamps) { ['2015-08-18T18:14:22Z'] } + let(:month_year_name) { ['August 2020'] } describe "#format_array_for_display" do it "formats four digit years" do @@ -27,6 +28,7 @@ expect(described_class.years_from_dates(timestamps)).to eq [2015] expect(described_class.years_from_dates(months_and_years)).to eq [2015] expect(described_class.years_from_dates(years)).to eq [2015] + expect(described_class.years_from_dates(month_year_name)).to eq [2020] end it "handles bad dates" do diff --git a/spec/lib/describe_indexer_spec.rb b/spec/lib/describe_indexer_spec.rb index bd864b39..ef0cbaa5 100644 --- a/spec/lib/describe_indexer_spec.rb +++ b/spec/lib/describe_indexer_spec.rb @@ -189,6 +189,35 @@ expect(response["response"]["numFound"]).to eq 2 end + it "can sort by year_available_itsi" do + Rails.configuration.pdc_discovery.index_pdc_describe = true + indexer.index + response = Blacklight.default_index.connection.get 'select', params: { q: '*:*', sort: 'year_available_itsi desc' } + expect(response["response"]["numFound"]).to eq 2 + expect(response["response"]['docs'].first['pdc_created_at_dtsi']).to eq("2023-07-11T11:06:10Z") + expect(response["response"]['docs'].last['pdc_created_at_dtsi']).to eq("2021-12-31T19:00:00Z") + end + + context "works with multiple creators" do + let(:pppl1) { File.read(File.join(fixture_path, 'files', 'pppl1.json')) } + let(:pppl2) { File.read(File.join(fixture_path, 'files', 'pppl2.json')) } + before do + stub_request(:get, "https://pdc-describe-prod.princeton.edu/describe/works/6.json") + .to_return(status: 200, body: pppl1, headers: {}) + stub_request(:get, "https://pdc-describe-prod.princeton.edu/describe/works/20.json") + .to_return(status: 200, body: pppl2, headers: {}) + end + + it "can sort by the first author" do + Rails.configuration.pdc_discovery.index_pdc_describe = true + indexer.index + response = Blacklight.default_index.connection.get 'select', params: { q: '*:*', sort: 'author_si desc' } + expect(response["response"]["numFound"]).to eq 2 + expect(response["response"]['docs'].first['author_tesim'].first).to eq("Wang, Yin") + expect(response["response"]['docs'].last['author_tesim'].first).to eq("Schwartz, Jacob A.") + end + end + context "when there are items which are under active embargo" do let(:item_file_fixture) { file_fixture("pdc_describe_active_embargo.json") } let(:embargo_resource) { item_file_fixture.read }