diff --git a/.gitignore b/.gitignore index 806b4a4c..d54317d8 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ stop_solr.sh .idea .cache .irb_history +.byebug_history ht_secure_data.rb logs overlap/overlap_umich.tsv @@ -20,6 +21,7 @@ lib/translation_maps/umich/libLocInfo.yaml /umich_catalog_indexing/debug* /umich_catalog_indexing/.m2 /umich_catalog_indexing/.ssh/* +umich_catalog_indexing/coverage/ /sftp/ssh/* !/sftp/ssh/README.md /sftp/search_daily_bibs/*.xml @@ -31,3 +33,4 @@ lib/translation_maps/umich/libLocInfo.yaml !sftp/search_daily_bibs/birds_2022021017_21131448650006381_new.tar.gz support_dbs/scratch/* !support_dbs/scratch/.keep +biblio/biblio.zip diff --git a/umich_catalog_indexing/Gemfile b/umich_catalog_indexing/Gemfile index d5737d13..9ba97c77 100644 --- a/umich_catalog_indexing/Gemfile +++ b/umich_catalog_indexing/Gemfile @@ -7,6 +7,7 @@ group :development do gem "rspec", "~>3.0" gem "webmock", "~>3.0" gem "standard" + gem "simplecov" end gem "yell", "~>2.0" diff --git a/umich_catalog_indexing/Gemfile.lock b/umich_catalog_indexing/Gemfile.lock index f67a6c2f..81b50bea 100644 --- a/umich_catalog_indexing/Gemfile.lock +++ b/umich_catalog_indexing/Gemfile.lock @@ -48,6 +48,7 @@ GEM crack (0.4.5) rexml diff-lcs (1.5.0) + docile (1.4.0) domain_name (0.6.20240107) dot-properties (0.1.4) bundler (>= 2.2.33) @@ -177,6 +178,12 @@ GEM connection_pool (>= 2.3.0) rack (>= 2.2.4) redis-client (>= 0.14.0) + simplecov (0.22.0) + docile (~> 1.1) + simplecov-html (~> 0.11) + simplecov_json_formatter (~> 0.1) + simplecov-html (0.12.3) + simplecov_json_formatter (0.1.4) slop (4.10.1) standard (1.33.0) language_server-protocol (~> 3.17.0.2) @@ -248,6 +255,7 @@ DEPENDENCIES sequel (~> 5.0) sftp! sidekiq + simplecov standard traject (~> 3.0, >= 3.8.2) traject-marc4j_reader (~> 1.0) diff --git a/umich_catalog_indexing/lib/jobs/utilities/translation_map_fetcher.rb b/umich_catalog_indexing/lib/jobs/utilities/translation_map_fetcher.rb index 44a17ebf..4c155c69 100644 --- a/umich_catalog_indexing/lib/jobs/utilities/translation_map_fetcher.rb +++ b/umich_catalog_indexing/lib/jobs/utilities/translation_map_fetcher.rb @@ -2,8 +2,17 @@ module Jobs module Utilities class TranslationMapFetcher - def initialize(logger = S.logger) - @logger = logger + def initialize( + lib_loc_info_klass: Jobs::LibLocInfo, + electronic_collections_klass: Jobs::ElectronicCollections, + high_level_browse_klass: HighLevelBrowse, + translation_map_dir: "/app/lib/translation_maps" + ) + @logger = S.logger + @lib_loc_info_klass = lib_loc_info_klass + @electronic_collections_klass = electronic_collections_klass + @high_level_browse_klass = high_level_browse_klass + @translation_map_dir = translation_map_dir end def run @@ -19,7 +28,7 @@ def run def fetch_high_level_browse if should_fetch?(hlb_file) - HighLevelBrowse.fetch_and_save(dir: hlb_dir) + @high_level_browse_klass.fetch_and_save(dir: hlb_dir) @logger.info "updated #{hlb_file}" else @logger.info "#{hlb_file} is less than one day old. Did not update" @@ -27,11 +36,11 @@ def fetch_high_level_browse end def fetch_lib_loc_info - fetch_translation_map(path: lib_loc_info_file, fetcher: lambda { Jobs::LibLocInfo.generate_translation_map }) + fetch_translation_map(path: lib_loc_info_file, fetcher: lambda { @lib_loc_info_klass.generate_translation_map }) end def fetch_electronic_collections - fetch_translation_map(path: electronic_collection_file, fetcher: lambda { Jobs::ElectronicCollections.generate_translation_map }) + fetch_translation_map(path: electronic_collection_file, fetcher: lambda { @electronic_collections_klass.generate_translation_map }) end # @param path [String] [path to where the translation map should be saved] @@ -41,12 +50,10 @@ def fetch_translation_map(path:, fetcher:) if should_fetch?(path) temporary_path = "#{path}_#{SecureRandom.alphanumeric(8)}.temporary" File.write(temporary_path, fetcher.call) - if !File.exist?(temporary_path) || File.size?(temporary_path) < 15 - @logger.error "Did not update #{path}. Failed to load file" - else - File.rename(temporary_path, path) - @logger.info "updated #{path}" - end + raise StandardError, "#{temporary_path} does not exist; Failed to load file" if !File.exist?(temporary_path) + raise StandardError, "#{temporary_path} is too small; Failed to load file" if File.size?(temporary_path) < 15 + File.rename(temporary_path, path) + @logger.info "updated #{path}" else @logger.info "#{path} is less than one day old. Did not update" end @@ -59,19 +66,19 @@ def should_fetch?(file) end def hlb_dir - "/app/lib/translation_maps" + @translation_map_dir end def hlb_file - "#{hlb_dir}/hlb.json.gz" + "#{@translation_map_dir}/hlb.json.gz" end def lib_loc_info_file - "/app/lib/translation_maps/umich/libLocInfo.yaml" + "#{@translation_map_dir}/umich/libLocInfo.yaml" end def electronic_collection_file - "/app/lib/translation_maps/umich/electronic_collections.yaml" + "#{@translation_map_dir}/umich/electronic_collections.yaml" end end end diff --git a/umich_catalog_indexing/lib/services.rb b/umich_catalog_indexing/lib/services.rb index 46424d04..0115d493 100644 --- a/umich_catalog_indexing/lib/services.rb +++ b/umich_catalog_indexing/lib/services.rb @@ -4,6 +4,10 @@ Services = Canister.new S = Services +S.register(:project_root) do + File.absolute_path(File.join(__dir__, "..")) +end + S.register(:log_stream) do $stdout.sync = true $stdout diff --git a/umich_catalog_indexing/spec/jobs/alma_file_processor_spec.rb b/umich_catalog_indexing/spec/jobs/utilities/alma_file_processor_spec.rb similarity index 83% rename from umich_catalog_indexing/spec/jobs/alma_file_processor_spec.rb rename to umich_catalog_indexing/spec/jobs/utilities/alma_file_processor_spec.rb index e270595b..137039d8 100644 --- a/umich_catalog_indexing/spec/jobs/alma_file_processor_spec.rb +++ b/umich_catalog_indexing/spec/jobs/utilities/alma_file_processor_spec.rb @@ -1,4 +1,4 @@ -require_relative '../spec_helper.rb' +require_relative "../../spec_helper" require "jobs" RSpec.describe Jobs::Utilities::AlmaFileProcessor do before(:each) do @@ -13,8 +13,8 @@ @mkdir_double = double("MkidrDouble", mkdir: "") @run_params = { sftp: instance_double(SFTP::Client, get: ""), - tar: lambda{|path, destination| @tar_double.exec(path, destination)}, - mkdir: lambda{|dir| @mkdir_double.mkdir(dir)} + tar: lambda { |path, destination| @tar_double.exec(path, destination) }, + mkdir: lambda { |dir| @mkdir_double.mkdir(dir) } } end it "calls sftp get function with path and destination" do @@ -29,7 +29,7 @@ expect(@mkdir_double).to receive(:mkdir).with("/app/scratch") subject.run(**@run_params) end - end + end context "#xml_file" do it "returns the appropriate filename" do expect(subject.xml_file).to eq("/app/scratch/file.xml") @@ -38,11 +38,11 @@ context "#clean" do before(:each) do @dir_delete_double = class_double(FileUtils, remove_dir: nil) - @delete = lambda{|file| @dir_delete_double.remove_dir(file)} + @delete = lambda { |file| @dir_delete_double.remove_dir(file) } end it "removes the files put in the scratch directory" do expect(@dir_delete_double).to receive(:remove_dir).with("/app/scratch") - subject.clean(@delete) + subject.clean(@delete) end end end diff --git a/umich_catalog_indexing/spec/jobs/utilities/translation_map_fetcher_spec.rb b/umich_catalog_indexing/spec/jobs/utilities/translation_map_fetcher_spec.rb new file mode 100644 index 00000000..0c3fc015 --- /dev/null +++ b/umich_catalog_indexing/spec/jobs/utilities/translation_map_fetcher_spec.rb @@ -0,0 +1,77 @@ +require_relative "../../spec_helper" +require "jobs" +require "securerandom" + +RSpec.describe Jobs::Utilities::TranslationMapFetcher do + before(:each) do + @tmp_dir = File.join(S.project_root, "tmp") + @umich_dir = File.join(@tmp_dir, "umich") + @hlb_path = File.join(@tmp_dir, "hlb.json.gz") + @lib_loc_info_path = File.join(@umich_dir, "libLocInfo.yaml") + @electronic_collections_path = File.join(@umich_dir, "electronic_collections.yaml") + Dir.mkdir(@tmp_dir) unless File.exist?(@tmp_dir) + Dir.mkdir(@umich_dir) unless File.exist?(@umich_dir) + @params = { + lib_loc_info_klass: class_double(Jobs::LibLocInfo, generate_translation_map: string_of_size(20)), + electronic_collections_klass: class_double(Jobs::ElectronicCollections, generate_translation_map: string_of_size(20)), + high_level_browse_klass: class_double(HighLevelBrowse, fetch_and_save: nil), + translation_map_dir: @tmp_dir + } + end + after(:each) do + FileUtils.remove_dir(@tmp_dir, "true") + end + + def string_of_size(size) + SecureRandom.random_bytes(size) + end + + subject do + described_class.new(**@params) + end + + context "#run" do + context "empty translation map directory" do + it "generates translation maps" do + expect(File.exist?(@lib_loc_info_path)).to eq(false) + expect(File.exist?(@electronic_collections_path)).to eq(false) + subject.run + expect(File.exist?(@lib_loc_info_path)).to eq(true) + expect(File.exist?(@electronic_collections_path)).to eq(true) + expect(@params[:high_level_browse_klass]).to have_received(:fetch_and_save) + end + end + context "has new translation map files" do + it "does not generate new translation maps" do + `touch #{@lib_loc_info_path}` + `touch #{@electronic_collections_path}` + `touch #{@hlb_path}` + subject.run + expect(@params[:high_level_browse_klass]).not_to have_received(:fetch_and_save) + expect(@params[:lib_loc_info_klass]).not_to have_received(:generate_translation_map) + expect(@params[:electronic_collections_klass]).not_to have_received(:generate_translation_map) + end + end + context "has old translation map files" do + it "generates new files" do + `touch -d "-2 days" #{@lib_loc_info_path} ` + `touch -d "-2 days" #{@electronic_collections_path}` + `touch -d "-2 days" #{@hlb_path}` + subject.run + expect(@params[:high_level_browse_klass]).to have_received(:fetch_and_save) + # expect(@params[:lib_loc_info_klass]).to have_received(:generate_translation_map) + expect(@params[:electronic_collections_klass]).to have_received(:generate_translation_map) + end + end + context "fails to generate big enough files" do + it "errors out for too small lib_loc_info" do + allow(@params[:lib_loc_info_klass]).to receive(:generate_translation_map).and_return(string_of_size(2)) + expect { subject.run }.to raise_error(StandardError) + end + it "errors out for too small electronic_collections_file" do + allow(@params[:electronic_collections_klass]).to receive(:generate_translation_map).and_return(string_of_size(2)) + expect { subject.run }.to raise_error(StandardError) + end + end + end +end diff --git a/umich_catalog_indexing/spec/spec_helper.rb b/umich_catalog_indexing/spec/spec_helper.rb index 280e3da3..37c02f43 100644 --- a/umich_catalog_indexing/spec/spec_helper.rb +++ b/umich_catalog_indexing/spec/spec_helper.rb @@ -1,5 +1,9 @@ require "webmock/rspec" require "alma_rest_client" +require "byebug" +require "simplecov" +SimpleCov.start +ENV["APP_ENV"] = "test" # This file was generated by the `rspec --init` command. Conventionally, all # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`. @@ -48,77 +52,75 @@ # triggering implicit auto-inclusion in groups with matching metadata. config.shared_context_metadata_behavior = :apply_to_host_groups -# The settings below are suggested to provide a good initial experience -# with RSpec, but feel free to customize to your heart's content. -=begin - # This allows you to limit a spec run to individual examples or groups - # you care about by tagging them with `:focus` metadata. When nothing - # is tagged with `:focus`, all examples get run. RSpec also provides - # aliases for `it`, `describe`, and `context` that include `:focus` - # metadata: `fit`, `fdescribe` and `fcontext`, respectively. - config.filter_run_when_matching :focus - - # Allows RSpec to persist some state between runs in order to support - # the `--only-failures` and `--next-failure` CLI options. We recommend - # you configure your source control system to ignore this file. - config.example_status_persistence_file_path = "spec/examples.txt" - - # Limits the available syntax to the non-monkey patched syntax that is - # recommended. For more details, see: - # - http://rspec.info/blog/2012/06/rspecs-new-expectation-syntax/ - # - http://www.teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/ - # - http://rspec.info/blog/2014/05/notable-changes-in-rspec-3/#zero-monkey-patching-mode - config.disable_monkey_patching! - - # This setting enables warnings. It's recommended, but in some cases may - # be too noisy due to issues in dependencies. - config.warnings = true - - # Many RSpec users commonly either run the entire suite or an individual - # file, and it's useful to allow more verbose output when running an - # individual spec file. - if config.files_to_run.one? - # Use the documentation formatter for detailed output, - # unless a formatter has already been configured - # (e.g. via a command-line flag). - config.default_formatter = "doc" - end - - # Print the 10 slowest examples and example groups at the - # end of the spec run, to help surface which specs are running - # particularly slow. - config.profile_examples = 10 - - # Run specs in random order to surface order dependencies. If you find an - # order dependency and want to debug it, you can fix the order by providing - # the seed, which is printed after each run. - # --seed 1234 - config.order = :random - - # Seed global randomization in this process using the `--seed` CLI option. - # Setting this allows you to use `--seed` to deterministically reproduce - # test failures related to randomization by passing the same `--seed` value - # as the one that triggered the failure. - Kernel.srand config.seed -=end + # The settings below are suggested to provide a good initial experience + # with RSpec, but feel free to customize to your heart's content. + # # This allows you to limit a spec run to individual examples or groups + # # you care about by tagging them with `:focus` metadata. When nothing + # # is tagged with `:focus`, all examples get run. RSpec also provides + # # aliases for `it`, `describe`, and `context` that include `:focus` + # # metadata: `fit`, `fdescribe` and `fcontext`, respectively. + # config.filter_run_when_matching :focus + # + # # Allows RSpec to persist some state between runs in order to support + # # the `--only-failures` and `--next-failure` CLI options. We recommend + # # you configure your source control system to ignore this file. + # config.example_status_persistence_file_path = "spec/examples.txt" + # + # # Limits the available syntax to the non-monkey patched syntax that is + # # recommended. For more details, see: + # # - http://rspec.info/blog/2012/06/rspecs-new-expectation-syntax/ + # # - http://www.teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/ + # # - http://rspec.info/blog/2014/05/notable-changes-in-rspec-3/#zero-monkey-patching-mode + # config.disable_monkey_patching! + # + # # This setting enables warnings. It's recommended, but in some cases may + # # be too noisy due to issues in dependencies. + # config.warnings = true + # + # # Many RSpec users commonly either run the entire suite or an individual + # # file, and it's useful to allow more verbose output when running an + # # individual spec file. + # if config.files_to_run.one? + # # Use the documentation formatter for detailed output, + # # unless a formatter has already been configured + # # (e.g. via a command-line flag). + # config.default_formatter = "doc" + # end + # + # # Print the 10 slowest examples and example groups at the + # # end of the spec run, to help surface which specs are running + # # particularly slow. + # config.profile_examples = 10 + # + # # Run specs in random order to surface order dependencies. If you find an + # # order dependency and want to debug it, you can fix the order by providing + # # the seed, which is printed after each run. + # # --seed 1234 + # config.order = :random + # + # # Seed global randomization in this process using the `--seed` CLI option. + # # Setting this allows you to use `--seed` to deterministically reproduce + # # test failures related to randomization by passing the same `--seed` value + # # as the one that triggered the failure. + # Kernel.srand config.seed end -#[:get, :post,:put, :delete].each do |name| - #define_method("stub_alma_#{name}_request") do |url:, input: nil, output: "", status: 200, query: nil| - #req_attributes = Hash.new - #req_attributes[:headers] = { - #accept: 'application/json', - #Authorization: "apikey #{ENV['ALMA_API_KEY']}", - #'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', - #'User-Agent'=>'Ruby', - #'Content-Type' => 'application/json' - #} - #req_attributes[:body] = input unless input.nil? - #req_attributes[:query] = query unless query.nil? - #resp = { headers: {content_type: 'application/json'}, status: status, body: output } +# [:get, :post,:put, :delete].each do |name| +# define_method("stub_alma_#{name}_request") do |url:, input: nil, output: "", status: 200, query: nil| +# req_attributes = Hash.new +# req_attributes[:headers] = { +# accept: 'application/json', +# Authorization: "apikey #{ENV['ALMA_API_KEY']}", +# 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', +# 'User-Agent'=>'Ruby', +# 'Content-Type' => 'application/json' +# } +# req_attributes[:body] = input unless input.nil? +# req_attributes[:query] = query unless query.nil? +# resp = { headers: {content_type: 'application/json'}, status: status, body: output } - #stub_request(name, "#{ENV["ALMA_API_HOST"]}/almaws/v1/#{url}").with( **req_attributes).to_return(**resp) - #end -#end +# stub_request(name, "#{ENV["ALMA_API_HOST"]}/almaws/v1/#{url}").with( **req_attributes).to_return(**resp) +# end +# end def fixture(path) File.read("./spec/fixtures/#{path}") end