diff --git a/thoth-app/src/models/work/mod.rs b/thoth-app/src/models/work/mod.rs index d8df5fcf..43fe3439 100644 --- a/thoth-app/src/models/work/mod.rs +++ b/thoth-app/src/models/work/mod.rs @@ -364,25 +364,25 @@ impl DisplayWork for WorkWithRelations { href={self.onix_projectmuse_endpoint()} class="dropdown-item" > - {"ONIX (Project MUSE)"} + {"ONIX 3.0 (Project MUSE)"} - {"ONIX (OAPEN)"} + {"ONIX 3.0 (OAPEN/DOAB)"} - {"ONIX (JSTOR)"} + {"ONIX 3.0 (JSTOR)"} - {"ONIX (EBSCO Host)"} + {"ONIX 2.1 (EBSCO Host)"} for Work { fn xml_element(&self, w: &mut EventWriter) -> ThothResult<()> { - let work_id = format!("urn:uuid:{}", self.work_id.to_string()); - let (main_isbn, isbns) = get_publications_data(&self.publications); + // Project MUSE can only ingest works which have at least one BIC or BISAC subject code + if !self + .subjects + .iter() + .any(|s| s.subject_type.eq(&SubjectType::BISAC) || s.subject_type.eq(&SubjectType::BIC)) + { + Err(ThothError::IncompleteMetadataRecord( + "onix_3.0::project_muse".to_string(), + "No BIC or BISAC subject code".to_string(), + )) + } // We can only generate the document if there's a PDF - if let Some(pdf_url) = self + else if let Some(pdf_url) = self .publications .iter() .find(|p| p.publication_type.eq(&PublicationType::PDF)) .and_then(|p| p.publication_url.as_ref()) { + let work_id = format!("urn:uuid:{}", self.work_id.to_string()); + let (main_isbn, isbns) = get_publications_data(&self.publications); write_element_block("Product", w, |w| { write_element_block("RecordReference", w, |w| { w.write(XmlEvent::Characters(&work_id)) @@ -194,54 +205,71 @@ impl XmlElementBlock for Work { })?; } for subject in &self.subjects { - write_element_block("Subject", w, |w| { - XmlElement::::xml_element(&subject.subject_type, w)?; - write_element_block("SubjectCode", w, |w| { - w.write(XmlEvent::Characters(&subject.subject_code)) + // Project MUSE can't process records containing keywords + if subject.subject_type != SubjectType::KEYWORD { + write_element_block("Subject", w, |w| { + XmlElement::::xml_element( + &subject.subject_type, + w, + )?; + write_element_block("SubjectCode", w, |w| { + w.write(XmlEvent::Characters(&subject.subject_code)) + .map_err(|e| e.into()) + }) + })?; + } + } + Ok(()) + })?; + write_element_block("CollateralDetail", w, |w| { + if let Some(labstract) = &self.long_abstract { + write_element_block("TextContent", w, |w| { + let mut lang_fmt: HashMap<&str, &str> = HashMap::new(); + lang_fmt.insert("language", "eng"); + // 03 Description ("30 Abstract" not implemented in OAPEN) + write_element_block("TextType", w, |w| { + w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) + })?; + // 00 Unrestricted + write_element_block("ContentAudience", w, |w| { + w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) + })?; + write_full_element_block("Text", None, Some(lang_fmt), w, |w| { + w.write(XmlEvent::Characters(labstract)) .map_err(|e| e.into()) }) })?; } - Ok(()) - })?; - if self.long_abstract.is_some() || self.toc.is_some() { - write_element_block("CollateralDetail", w, |w| { - if let Some(labstract) = &self.long_abstract { - write_element_block("TextContent", w, |w| { - let mut lang_fmt: HashMap<&str, &str> = HashMap::new(); - lang_fmt.insert("language", "eng"); - // 03 Description ("30 Abstract" not implemented in OAPEN) - write_element_block("TextType", w, |w| { - w.write(XmlEvent::Characters("03")).map_err(|e| e.into()) - })?; - // 00 Unrestricted - write_element_block("ContentAudience", w, |w| { - w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) - })?; - write_full_element_block("Text", None, Some(lang_fmt), w, |w| { - w.write(XmlEvent::Characters(labstract)) - .map_err(|e| e.into()) - }) + if let Some(toc) = &self.toc { + write_element_block("TextContent", w, |w| { + // 04 Table of contents + write_element_block("TextType", w, |w| { + w.write(XmlEvent::Characters("04")).map_err(|e| e.into()) })?; - } - if let Some(toc) = &self.toc { - write_element_block("TextContent", w, |w| { - // 04 Table of contents - write_element_block("TextType", w, |w| { - w.write(XmlEvent::Characters("04")).map_err(|e| e.into()) - })?; - // 00 Unrestricted - write_element_block("ContentAudience", w, |w| { - w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) - })?; - write_element_block("Text", w, |w| { - w.write(XmlEvent::Characters(toc)).map_err(|e| e.into()) - }) + // 00 Unrestricted + write_element_block("ContentAudience", w, |w| { + w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) })?; - } - Ok(()) - })?; - } + write_element_block("Text", w, |w| { + w.write(XmlEvent::Characters(toc)).map_err(|e| e.into()) + }) + })?; + } + write_element_block("TextContent", w, |w| { + // 20 Open access statement + write_element_block("TextType", w, |w| { + w.write(XmlEvent::Characters("20")).map_err(|e| e.into()) + })?; + // 00 Unrestricted + write_element_block("ContentAudience", w, |w| { + w.write(XmlEvent::Characters("00")).map_err(|e| e.into()) + })?; + write_element_block("Text", w, |w| { + w.write(XmlEvent::Characters("Open Access")) + .map_err(|e| e.into()) + }) + }) + })?; write_element_block("PublishingDetail", w, |w| { write_element_block("Imprint", w, |w| { write_element_block("ImprintName", w, |w| { @@ -268,15 +296,15 @@ impl XmlElementBlock for Work { if let Some(date) = self.publication_date { write_element_block("PublishingDate", w, |w| { let mut date_fmt: HashMap<&str, &str> = HashMap::new(); - date_fmt.insert("dateformat", "01"); // 01 YYYYMM + date_fmt.insert("dateformat", "00"); // 00 YYYYMMDD write_element_block("PublishingDateRole", w, |w| { - // 19 Publication date of print counterpart - w.write(XmlEvent::Characters("19")).map_err(|e| e.into()) + // 01 Publication date + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) })?; - // dateformat="01" YYYYMM + // dateformat="00" YYYYMMDD write_full_element_block("Date", None, Some(date_fmt), w, |w| { - w.write(XmlEvent::Characters(&date.format("%Y%m").to_string())) + w.write(XmlEvent::Characters(&date.format("%Y%m%d").to_string())) .map_err(|e| e.into()) }) })?; @@ -355,9 +383,9 @@ impl XmlElementBlock for Work { write_element_block("ProductAvailability", w, |w| { w.write(XmlEvent::Characters("99")).map_err(|e| e.into()) })?; - // 04 Contact supplier + // 01 Free of charge write_element_block("UnpricedItemType", w, |w| { - w.write(XmlEvent::Characters("04")).map_err(|e| e.into()) + w.write(XmlEvent::Characters("01")).map_err(|e| e.into()) }) })?; } @@ -425,11 +453,11 @@ impl XmlElement for SubjectType { match self { SubjectType::BIC => "12", SubjectType::BISAC => "10", - SubjectType::KEYWORD => "20", SubjectType::LCC => "04", SubjectType::THEMA => "93", SubjectType::CUSTOM => "B2", - SubjectType::Other(_) => unreachable!(), + // Keywords are not output for Project MUSE + SubjectType::KEYWORD | SubjectType::Other(_) => unreachable!(), } } } @@ -812,8 +840,6 @@ mod tests { assert!(output.contains(r#" JA85"#)); assert!(output.contains(r#" 93"#)); assert!(output.contains(r#" JWA"#)); - assert!(output.contains(r#" 20"#)); - assert!(output.contains(r#" keyword1"#)); assert!(output.contains(r#" B2"#)); assert!(output.contains(r#" custom1"#)); assert!(output.contains(r#" "#)); @@ -824,6 +850,9 @@ mod tests { assert!(output.contains(r#" "#)); assert!(output.contains(r#" 04"#)); assert!(output.contains(r#" 1. Chapter 1"#)); + assert!(output.contains(r#" "#)); + assert!(output.contains(r#" 20"#)); + assert!(output.contains(r#" Open Access"#)); assert!(output.contains(r#" "#)); assert!(output.contains(r#" "#)); assert!(output.contains(r#" OA Editions Imprint"#)); @@ -833,8 +862,8 @@ mod tests { assert!(output.contains(r#" León, Spain"#)); assert!(output.contains(r#" 04"#)); assert!(output.contains(r#" "#)); - assert!(output.contains(r#" 19"#)); - assert!(output.contains(r#" 199912"#)); + assert!(output.contains(r#" 01"#)); + assert!(output.contains(r#" 19991231"#)); assert!(output.contains(r#" "#)); assert!(output.contains(r#" 06"#)); assert!(output.contains(r#" "#)); @@ -852,7 +881,7 @@ mod tests { )); assert!(output.contains(r#" https://www.book.com"#)); assert!(output.contains(r#" 99"#)); - assert!(output.contains(r#" 04"#)); + assert!(output.contains(r#" 01"#)); assert!(output.contains(r#" 09"#)); assert!(output.contains(r#" OA Editions"#)); assert!(output.contains(r#" 29"#)); @@ -860,6 +889,8 @@ mod tests { assert!(output.contains(r#" https://www.book.com/pdf"#)); // Test that OAPEN-only blocks are not output in Project MUSE format + assert!(!output.contains(r#" 20"#)); + assert!(!output.contains(r#" keyword1"#)); assert!(!output.contains(r#" "#)); assert!(!output.contains(r#" 01"#)); assert!(!output.contains(r#" 06"#)); @@ -886,10 +917,11 @@ mod tests { test_work.subtitle = None; test_work.page_count = None; test_work.long_abstract = None; + test_work.toc = None; test_work.place = None; test_work.publication_date = None; test_work.landing_page = None; - test_work.subjects.clear(); + test_work.subjects.drain(1..); let output = generate_test_output(&test_work); // No DOI supplied assert!(!output.contains(r#" 06"#)); @@ -911,68 +943,67 @@ mod tests { assert!(!output.contains(r#" 00"#)); assert!(!output.contains(r#" 334"#)); assert!(!output.contains(r#" 03"#)); - // No long abstract supplied: CollateralDetail block only contains TOC + // No long abstract supplied + assert!(!output.contains(r#" 03"#)); + assert!(!output.contains(r#" Lorem ipsum dolor sit amet"#)); + // No TOC supplied + assert!(!output.contains(r#" 04"#)); + assert!(!output.contains(r#" 1. Chapter 1"#)); + // CollateralDetail block is still present as it always contains Open Access statement assert!(output.contains(r#" "#)); assert!(output.contains(r#" "#)); - assert!(output.contains(r#" 04"#)); assert!(output.contains(r#" 00"#)); - assert!(output.contains(r#" 1. Chapter 1"#)); - assert!(!output.contains(r#" 03"#)); - assert!(!output.contains(r#" Lorem ipsum dolor sit amet"#)); // No place supplied assert!(!output.contains(r#" León, Spain"#)); // No publication date supplied assert!(!output.contains(r#" "#)); - assert!(!output.contains(r#" 19"#)); - assert!(!output.contains(r#" 199912"#)); + assert!(!output.contains(r#" 01"#)); + assert!(!output.contains(r#" 19991231"#)); // No landing page supplied: only one SupplyDetail block, linking to PDF download assert!(!output.contains(r#" 01"#)); assert!(!output.contains( r#" Publisher's website: web shop"# )); assert!(!output.contains(r#" https://www.book.com"#)); - // No subjects supplied - assert!(!output.contains(r#" "#)); - assert!(!output.contains(r#" 12"#)); - assert!(!output.contains(r#" AAB"#)); + // All subjects removed except BIC assert!(!output.contains(r#" 10"#)); assert!(!output.contains(r#" AAA000000"#)); assert!(!output.contains(r#" 04"#)); assert!(!output.contains(r#" JA85"#)); assert!(!output.contains(r#" 93"#)); assert!(!output.contains(r#" JWA"#)); - assert!(!output.contains(r#" 20"#)); - assert!(!output.contains(r#" keyword1"#)); assert!(!output.contains(r#" B2"#)); assert!(!output.contains(r#" custom1"#)); - // Replace long abstract but remove TOC - // Result: CollateralDetail block still present, but now only contains long abstract - test_work.long_abstract = Some("Lorem ipsum dolor sit amet".to_string()); - test_work.toc = None; - let output = generate_test_output(&test_work); - assert!(output.contains(r#" "#)); - assert!(output.contains(r#" "#)); - assert!(output.contains(r#" 03"#)); - assert!(output.contains(r#" 00"#)); - assert!(output.contains(r#" Lorem ipsum dolor sit amet"#)); - assert!(!output.contains(r#" 04"#)); - assert!(!output.contains(r#" 1. Chapter 1"#)); - - // Remove both TOC and long abstract - // Result: No CollateralDetail block present at all - test_work.long_abstract = None; - let output = generate_test_output(&test_work); - assert!(!output.contains(r#" "#)); - assert!(!output.contains(r#" "#)); - assert!(!output.contains(r#" 03"#)); - assert!(!output.contains(r#" 00"#)); - assert!(!output.contains(r#" Lorem ipsum dolor sit amet"#)); - assert!(!output.contains(r#" 04"#)); - assert!(!output.contains(r#" 1. Chapter 1"#)); + // Remove the only remaining (BIC) subject + // Result: error (can't generate Project MUSE ONIX without either a BIC or BISAC subject) + test_work.subjects.clear(); + // Can't use helper function for this as it assumes Ok rather than Err + let mut buffer = Vec::new(); + let mut writer = xml::writer::EmitterConfig::new() + .perform_indent(true) + .create_writer(&mut buffer); + let wrapped_output = + XmlElementBlock::::xml_element(&test_work, &mut writer) + .map(|_| buffer) + .and_then(|onix| { + String::from_utf8(onix) + .map_err(|_| ThothError::InternalError("Could not parse XML".to_string())) + }); + assert!(wrapped_output.is_err()); + let output = wrapped_output.unwrap_err().to_string(); + assert_eq!( + output, + "Could not generate onix_3.0::project_muse: No BIC or BISAC subject code".to_string() + ); - // Remove the only publication, which is the PDF - // Result: error (can't generate OAPEN ONIX without PDF URL) + // Reinstate the BIC subject but remove the only publication, which is the PDF + // Result: error (can't generate Project MUSE ONIX without PDF URL) + test_work.subjects = vec![WorkSubjects { + subject_code: "AAB".to_string(), + subject_type: SubjectType::BIC, + subject_ordinal: 1, + }]; test_work.publications.clear(); // Can't use helper function for this as it assumes Ok rather than Err let mut buffer = Vec::new();