From eded7287d73db2add70825b3552fecc7d06981fc Mon Sep 17 00:00:00 2001 From: eric Date: Mon, 4 Mar 2024 15:24:37 -0500 Subject: [PATCH 1/2] february cleanup --- README.md | 8 ++++++++ core/loaders/doab_utils.py | 4 +++- core/loaders/harvest.py | 11 +++++++++-- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index ac2d5d48..73a2dca0 100644 --- a/README.md +++ b/README.md @@ -127,4 +127,12 @@ MARC Records * if you have records with both DIRECT and UNGLUE links, you'll need two MARCRecord instances * if you have both kinds of link, put them in _separate_ records, as marc_format can only take one value +MySQL Migration +--------------- + +## 5.7 - 8.0 Notes + +* Many migration blockers were removed by by dumping, then restoring the database. +* After that, RDS was able to migrate +* needed to create the unglueit user from the mysql client diff --git a/core/loaders/doab_utils.py b/core/loaders/doab_utils.py index f9407f16..37281a56 100644 --- a/core/loaders/doab_utils.py +++ b/core/loaders/doab_utils.py @@ -56,7 +56,6 @@ def doab_lang_to_iso_639_1(lang): 'doc': 'http://www.lyncode.com/xoai' } ) - STOREPROVIDERS = [ '7switch.com', 'amazon.ca', @@ -67,6 +66,7 @@ def doab_lang_to_iso_639_1(lang): 'apress.com', 'bloomsbury.com', 'bod.de', + 'booksdirect.co.za', 'cabi.org', 'cdcshoppingcart.uchicago.edu', 'checkout.sas.ac.uk', @@ -76,11 +76,13 @@ def doab_lang_to_iso_639_1(lang): 'edicions.ub.edu', 'epubli.de', 'eurekaselect.com', + 'fondazionecafoscari.storeden.com', 'global.oup.com', 'iospress.nl', 'karolinum.cz', 'librumstore.com', 'logos-verlag.de', + 'manchesteruniversitypress.co.uk', 'mitpress.mit.edu', 'munishop.muni.cz', 'nomos-shop.de', diff --git a/core/loaders/harvest.py b/core/loaders/harvest.py index 41e67600..c6e31ea9 100644 --- a/core/loaders/harvest.py +++ b/core/loaders/harvest.py @@ -118,6 +118,7 @@ def clean_archive(ebf): 'editorial.inudi.edu.pe', 'editorial.ucatolicaluisamigo.edu.co', 'editorial.uniagustiniana.edu.co', + 'editorialgrupo-aea.com', 'fcjp.derecho.unap.edu.pe', 'fedoabooks.unina.it', 'humanities-digital-library.org', @@ -127,6 +128,7 @@ def clean_archive(ebf): 'Scholars Portal', 'monographs.uc.pt', 'omp.ub.rub.de', + 'openuctpress.uct.ac.za', 'omp.zrc-sazu.si', 'openpress.mtsu.edu', 'teiresias-supplements.mcgill.ca', @@ -179,6 +181,7 @@ def harvesters(ebook): yield ebook.provider == 'press.ucalgary.ca', harvest_calgary yield ebook.provider in ['Ledizioni', 'bibsciences.org', 'heiup.uni-heidelberg.de', 'e-archivo.uc3m.es'], harvest_generic + yield ebook.provider in ['funlam.edu.co'], harvest_generic_chrome yield ebook.provider == 'muse.jhu.edu', harvest_muse yield ebook.provider == 'direct.mit.edu', harvest_mitpress yield ebook.provider == 'IOS Press Ebooks', harvest_ios @@ -291,7 +294,9 @@ def redirect_ebook(ebook): elif r.status_code == 200: if ebook.url != r.url: if models.Ebook.objects.exclude(id=ebook.id).filter(url=r.url).exists(): - return models.Ebook.objects.filter(url=r.url)[0], 0 + existing = models.Ebook.objects.filter(url=r.url)[0] + logger.error(f'ebook {ebook.id} redirects to existing {existing.id}') + return existing, 0 ebook.url = r.url ebook.set_provider() ebook.save() @@ -359,6 +364,9 @@ def harvest_generic(ebook, user_agent=settings.USER_AGENT): return set_bookshop(ebook) return make_dl_ebook(ebook.url, ebook, user_agent=user_agent) +def harvest_generic_chrome(ebook, ): + return make_dl_ebook(ebook.url, ebook, user_agent=settings.CHROME_UA) + def harvest_manual(ebook): def make_manual_ebf(format): @@ -520,7 +528,6 @@ def harvest_degruyter(ebook): ebook, status = redirect_ebook(ebook) if status < 1: return None, -1 if status < 0 else 0 - doc = get_soup(ebook.url, settings.GOOGLEBOT_UA) if doc: try: From b67945f13e0b7a0e8bff8c11a7a2996907343941 Mon Sep 17 00:00:00 2001 From: eric Date: Mon, 4 Mar 2024 17:01:30 -0500 Subject: [PATCH 2/2] add librarything key --- core/bookloader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/bookloader.py b/core/bookloader.py index b610ab31..73f31db6 100755 --- a/core/bookloader.py +++ b/core/bookloader.py @@ -527,7 +527,7 @@ def thingisbn(isbn): which come back as isbn_13') """ logger.info(u"looking up %s at ThingISBN", isbn) - url = "https://www.librarything.com/api/thingISBN/%s" % isbn + url = f"https://www.librarything.com/api/{settings.LIBRARYTHING_KEY}/thingISBN/{isbn}" xml = requests.get(url, headers={"User-Agent": settings.USER_AGENT}).content try: doc = ElementTree.fromstring(xml)