Skip to content

Commit

Permalink
Merge pull request #71 from mlibrary/tidy-2024-03
Browse files Browse the repository at this point in the history
Tidy to enable splitting and filtering zephir JSON
  • Loading branch information
niquerio authored Mar 20, 2024
2 parents f2a47f9 + 6075fd4 commit acf606b
Show file tree
Hide file tree
Showing 23 changed files with 271 additions and 288 deletions.
42 changes: 23 additions & 19 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,36 +1,40 @@
data
start_solr.sh
stop_solr.sh
.local
.solargraph.yml
.idea
.cache
.irb_history
.byebug_history
ht_secure_data.rb
logs
overlap/overlap_umich.tsv
tmp/
lib/translation_maps/hlb.json.gz
lib/translation_maps/umich/libLocInfo.yaml
.bash_history
.env
.bundle
/umich_catalog_indexing/.gem
/umich_catalog_indexing/.env
/umich_catalog_indexing/debug*
/umich_catalog_indexing/.m2
/umich_catalog_indexing/.ssh/*
.gem

umich_catalog_indexing/debug*
umich_catalog_indexing/.m2
umich_catalog_indexing/.ssh/*
umich_catalog_indexing/coverage/

umich_catalog_indexing/lib/translation_maps/hlb.json.gz
umich_catalog_indexing/lib/translation_maps/umich/libLocInfo.yaml

umich_catalog_indexing/scratch/*
!umich_catalog_indexing/scratch/.keep

umich_catalog_indexing/examples/*.xml
umich_catalog_indexing/examples/*/*.xml
umich_catalog_indexing/examples/*.tar.gz
umich_catalog_indexing/examples/*/*.tar.gz

/sftp/ssh/*
!/sftp/ssh/README.md
!/sftp/ssh/.keep
/sftp/search_daily_bibs/*.xml
/umich_catalog_indexing/*.xml
/umich_catalog_indexing/*/*.xml
/umich_catalog_indexing/*/*/*.xml
*.tar.gz
/sftp/search_daily_bibs/*.tar.gz
!sftp/search_daily_bibs/sample.tar.gz
!sftp/search_daily_bibs/birds_2022021017_21131448650006381_new.tar.gz

support_dbs/scratch/*
!support_dbs/scratch/.keep

biblio/biblio.zip

overlap/*.tsv
22 changes: 9 additions & 13 deletions compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,6 @@ services:
- ./umich_catalog_indexing/.:/app
- ./sftp/ssh/ssh_client_rsa_key:/etc/secret-volume/id_rsa:ro
- gem_cache:/gems
environment:
- REDIS_URL=redis://redis:6379
- HLB_XML_ENDPOINT=https://apps.lib.umich.edu/browse/categories/xml.php
- NODB=1
env_file:
- ./umich_catalog_indexing/.env
- ./umich_catalog_indexing/env.development
Expand Down Expand Up @@ -109,15 +105,15 @@ services:
#- MARIADB_PASSWORD=password
#- MARIADB_DATABASE=hathifiles

#hathioverlap:
#image: mariadb
#volumes:
#- overlap:/var/lib/mysql
#environment:
#- MARIADB_ROOT_PASSWORD=pass
#- MARIADB_USER=user
#- MARIADB_PASSWORD=password
#- MARIADB_DATABASE=overlap
hathioverlap:
build: overlap/.
volumes:
- overlap:/var/lib/mysql
environment:
- MARIADB_ROOT_PASSWORD=pass
- MARIADB_USER=user
- MARIADB_PASSWORD=password
- MARIADB_DATABASE=overlap

prometheus:
image: prom/prometheus
Expand Down
File renamed without changes.
15 changes: 2 additions & 13 deletions umich_catalog_indexing/env.development
Original file line number Diff line number Diff line change
@@ -1,22 +1,11 @@
HATHIFILE_HOST=hathidb
HATHIFILE_DB=hathifiles
HATHIFILE_USER=root
HATHIFILE_PASSWORD=pass
HATHI_OVERLAP_HOST=hathioverlap
HATHI_OVERLAP_DB=bibliosearch
HATHI_OVERLAP_USER=root
HATHI_OVERLAP_PASSWORD=pass
ALMA_API_HOST=https://api-na.hosted.exlibrisgroup.com
ALMA_FILES_USER=alma
ALMA_FILES_HOST=sftp
SSH_KEY_PATH=/etc/secret-volume/id_rsa
DAILY_ALMA_FILES_PATH="search_daily_bibs"
MACC_PRODUCTION_SOLR_URL=http://solr:8983/solr/biblio
HATCHER_PRODUCTION_SOLR_URL=http://solr:8983/solr/biblio
LIVE_SOLR_URL=http://solr:8983/solr/biblio
REINDEX_SOLR_URL=http://solr:8983/solr/biblio
SIDEKIQ_SUPERVISOR_HOST=http://supervisor:3000
SOLR_USER=solr
SOLR_PASSWORD=SolrRocks
SOLRCLOUD_ON=true
PROMETHEUS_PUSH_GATEWAY=http://pushgateway:9091
HLB_XML_ENDPOINT=https://apps.lib.umich.edu/browse/categories/xml.php
REDIS_URL=redis://redis:6379
33 changes: 16 additions & 17 deletions umich_catalog_indexing/indexers/settings.rb
Original file line number Diff line number Diff line change
@@ -1,36 +1,36 @@
$:.unshift "#{File.dirname(__FILE__)}/../lib"
require 'set'
require "services"
require "set"

require 'library_stdnums'
require "library_stdnums"

require 'traject/macros/marc21_semantics'
require "traject/macros/marc21_semantics"
extend Traject::Macros::Marc21Semantics

require 'traject/macros/marc_format_classifier'
require "traject/macros/marc_format_classifier"
extend Traject::Macros::MarcFormats

require 'ht_traject'
require "ht_traject"
extend HathiTrust::Traject::Macros
extend Traject::UMichFormat::Macros

require 'marc/fastxmlwriter'
require "marc/fastxmlwriter"

require 'marc_record_speed_monkeypatch'
require 'marc4j_fix'
require "marc_record_speed_monkeypatch"
require "marc4j_fix"

UmichOverlap = if ENV['NODB']
require "ht_traject/no_db_mocks/ht_overlap"
HathiTrust::NoDB::UmichOverlap
else
require 'ht_traject/ht_overlap.rb'
HathiTrust::UmichOverlap
end
UmichOverlap = if S.no_db?
require "ht_traject/no_db_mocks/ht_overlap"
HathiTrust::NoDB::UmichOverlap
else
require "ht_traject/ht_overlap"
HathiTrust::UmichOverlap
end

settings do
store "log.batch_progress", 10_000
end


logger.info RUBY_DESCRIPTION

################################
Expand All @@ -39,4 +39,3 @@

# Set up an area in the clipboard for use storing intermediate stuff
each_record HathiTrust::Traject::Macros.setup

Loading

0 comments on commit acf606b

Please sign in to comment.