Skip to content

Commit

Permalink
Merge pull request #1032 from Gluejar/maintenance-2024
Browse files Browse the repository at this point in the history
LT key, several omp sites
  • Loading branch information
eshellman authored Mar 4, 2024
2 parents 28051bd + b67945f commit 0ecccd8
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 4 deletions.
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -127,4 +127,12 @@ MARC Records
* if you have records with both DIRECT and UNGLUE links, you'll need two MARCRecord instances
* if you have both kinds of link, put them in _separate_ records, as marc_format can only take one value

MySQL Migration
---------------

## 5.7 - 8.0 Notes

* Many migration blockers were removed by by dumping, then restoring the database.
* After that, RDS was able to migrate
* needed to create the unglueit user from the mysql client

2 changes: 1 addition & 1 deletion core/bookloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -527,7 +527,7 @@ def thingisbn(isbn):
which come back as isbn_13')
"""
logger.info(u"looking up %s at ThingISBN", isbn)
url = "https://www.librarything.com/api/thingISBN/%s" % isbn
url = f"https://www.librarything.com/api/{settings.LIBRARYTHING_KEY}/thingISBN/{isbn}"
xml = requests.get(url, headers={"User-Agent": settings.USER_AGENT}).content
try:
doc = ElementTree.fromstring(xml)
Expand Down
4 changes: 3 additions & 1 deletion core/loaders/doab_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ def doab_lang_to_iso_639_1(lang):
'doc': 'http://www.lyncode.com/xoai'
}
)

STOREPROVIDERS = [
'7switch.com',
'amazon.ca',
Expand All @@ -67,6 +66,7 @@ def doab_lang_to_iso_639_1(lang):
'apress.com',
'bloomsbury.com',
'bod.de',
'booksdirect.co.za',
'cabi.org',
'cdcshoppingcart.uchicago.edu',
'checkout.sas.ac.uk',
Expand All @@ -76,11 +76,13 @@ def doab_lang_to_iso_639_1(lang):
'edicions.ub.edu',
'epubli.de',
'eurekaselect.com',
'fondazionecafoscari.storeden.com',
'global.oup.com',
'iospress.nl',
'karolinum.cz',
'librumstore.com',
'logos-verlag.de',
'manchesteruniversitypress.co.uk',
'mitpress.mit.edu',
'munishop.muni.cz',
'nomos-shop.de',
Expand Down
11 changes: 9 additions & 2 deletions core/loaders/harvest.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ def clean_archive(ebf):
'editorial.inudi.edu.pe',
'editorial.ucatolicaluisamigo.edu.co',
'editorial.uniagustiniana.edu.co',
'editorialgrupo-aea.com',
'fcjp.derecho.unap.edu.pe',
'fedoabooks.unina.it',
'humanities-digital-library.org',
Expand All @@ -127,6 +128,7 @@ def clean_archive(ebf):
'Scholars Portal',
'monographs.uc.pt',
'omp.ub.rub.de',
'openuctpress.uct.ac.za',
'omp.zrc-sazu.si',
'openpress.mtsu.edu',
'teiresias-supplements.mcgill.ca',
Expand Down Expand Up @@ -179,6 +181,7 @@ def harvesters(ebook):
yield ebook.provider == 'press.ucalgary.ca', harvest_calgary
yield ebook.provider in ['Ledizioni', 'bibsciences.org',
'heiup.uni-heidelberg.de', 'e-archivo.uc3m.es'], harvest_generic
yield ebook.provider in ['funlam.edu.co'], harvest_generic_chrome
yield ebook.provider == 'muse.jhu.edu', harvest_muse
yield ebook.provider == 'direct.mit.edu', harvest_mitpress
yield ebook.provider == 'IOS Press Ebooks', harvest_ios
Expand Down Expand Up @@ -291,7 +294,9 @@ def redirect_ebook(ebook):
elif r.status_code == 200:
if ebook.url != r.url:
if models.Ebook.objects.exclude(id=ebook.id).filter(url=r.url).exists():
return models.Ebook.objects.filter(url=r.url)[0], 0
existing = models.Ebook.objects.filter(url=r.url)[0]
logger.error(f'ebook {ebook.id} redirects to existing {existing.id}')
return existing, 0
ebook.url = r.url
ebook.set_provider()
ebook.save()
Expand Down Expand Up @@ -359,6 +364,9 @@ def harvest_generic(ebook, user_agent=settings.USER_AGENT):
return set_bookshop(ebook)
return make_dl_ebook(ebook.url, ebook, user_agent=user_agent)

def harvest_generic_chrome(ebook, ):
return make_dl_ebook(ebook.url, ebook, user_agent=settings.CHROME_UA)


def harvest_manual(ebook):
def make_manual_ebf(format):
Expand Down Expand Up @@ -520,7 +528,6 @@ def harvest_degruyter(ebook):
ebook, status = redirect_ebook(ebook)
if status < 1:
return None, -1 if status < 0 else 0

doc = get_soup(ebook.url, settings.GOOGLEBOT_UA)
if doc:
try:
Expand Down

0 comments on commit 0ecccd8

Please sign in to comment.