From f4b7d3917984e248e992edab1c355f67738c11c8 Mon Sep 17 00:00:00 2001
From: Carolyn Cole <cac9@princeton.edu>
Date: Tue, 15 Oct 2024 08:17:13 -0400
Subject: [PATCH] Add in a retry for urls that do not succeed Adding an open
 timeout and upping the read timeout

---
 app/lib/describe_indexer.rb | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/app/lib/describe_indexer.rb b/app/lib/describe_indexer.rb
index 4c0de763..8827c120 100644
--- a/app/lib/describe_indexer.rb
+++ b/app/lib/describe_indexer.rb
@@ -82,14 +82,27 @@ def rss_url_list
   ##
   # Parse the rss_url, get a JSON resource url for each item, convert it to XML, and pass it to traject
   def perform_indexing
+    urls_to_retry = []
     rss_url_list.each do |url|
-      resource_json = URI.open(url, read_timeout: 30).read
-      resource_xml = prep_for_indexing(resource_json)
-      traject_indexer.process(resource_xml)
-      Rails.logger.info "Successfully imported record from #{url}."
+      process_url(url)
+    rescue
+      urls_to_retry << url
+    end
+
+    # retry an errored urls a second time and send error only if they don't work a second time
+    urls_to_retry.each do |url|
+      process_url(url)
     rescue => ex
       Rails.logger.warn "Error importing record from #{url}. Exception: #{ex.message}"
       Honeybadger.notify "Error importing record from #{url}. Exception: #{ex.message}"
     end
   end
+
+  def process_url(url)
+    uri = URI.open(url, open_timeout: 30, read_timeout: 30)
+    resource_json = uri.read
+    resource_xml = prep_for_indexing(resource_json)
+    traject_indexer.process(resource_xml)
+    Rails.logger.info "Successfully imported record from #{url}."
+  end
 end