From c987dcb30892c034797997d177b41a1ccf1e9687 Mon Sep 17 00:00:00 2001 From: Jim Bailey Date: Thu, 27 Jul 2023 17:37:57 +0100 Subject: [PATCH] fix: Change the folder structure for the reporting bucket We are moving to a new analytical platform that requires a certain folder structure for the data. [MAP-19] --- app/lib/cloud_data/reports_feed.rb | 21 ++++-- app/workers/feeds/feed_worker.rb | 2 +- app/workers/gps_report_worker.rb | 16 +++-- .../hmpps-book-secure-move-api/values.yaml | 1 + spec/lib/cloud_data/reports_feed_spec.rb | 71 ++++++++++++++++--- spec/workers/feeds/feed_worker_spec.rb | 2 +- spec/workers/gps_report_worker_spec.rb | 60 ++++++++++++---- 7 files changed, 139 insertions(+), 34 deletions(-) diff --git a/app/lib/cloud_data/reports_feed.rb b/app/lib/cloud_data/reports_feed.rb index 9821cafad..466bccea0 100644 --- a/app/lib/cloud_data/reports_feed.rb +++ b/app/lib/cloud_data/reports_feed.rb @@ -1,5 +1,7 @@ module CloudData class ReportsFeed + DATABASE_NAME = 'feeds_report'.freeze + def initialize(bucket_name = ENV['S3_REPORTING_BUCKET_NAME']) client = Aws::S3::Client.new( access_key_id: ENV['S3_REPORTING_ACCESS_KEY_ID'], @@ -9,19 +11,26 @@ def initialize(bucket_name = ENV['S3_REPORTING_BUCKET_NAME']) @bucket = @s3.bucket(bucket_name) end - def write(content, obj_name, report_date = Time.zone.yesterday) - folder_name = report_date.strftime('%Y/%m/%d') - filename = report_date.strftime('%Y-%m-%d') + def write(content, table, report_date = Time.zone.yesterday) + path = full_path(report_date, table) - full_name = "#{folder_name}/#{filename}-#{obj_name}" - obj = bucket.object(full_name) + obj = bucket.object(path) obj.put(body: content) - full_name + path end private attr_reader :bucket + + def full_path(report_date, table) + "#{ENV.fetch('S3_REPORTING_PROJECT_PATH')}/" \ + 'data/' \ + "database_name=#{DATABASE_NAME}/" \ + "table_name=#{table}/" \ + "extraction_timestamp=#{report_date.strftime('%Y%m%d%H%M%SZ')}/" \ + "#{report_date.strftime('%Y-%m-%d')}-#{table}.jsonl" + end end end diff --git a/app/workers/feeds/feed_worker.rb b/app/workers/feeds/feed_worker.rb index dfcd5d111..9aa785bea 100644 --- a/app/workers/feeds/feed_worker.rb +++ b/app/workers/feeds/feed_worker.rb @@ -10,7 +10,7 @@ def perform(feed_name, date) time_since = TimeSince.new feed = "Feeds::#{feed_name.titleize}".constantize.new(date.beginning_of_day, date.end_of_day).call - CloudData::ReportsFeed.new.write(feed, "#{feed_name.pluralize}.jsonl", date) + CloudData::ReportsFeed.new.write(feed, feed_name.pluralize, date) Sidekiq.logger.info("Generated #{feed_name} feed in #{time_since.get} seconds") end diff --git a/app/workers/gps_report_worker.rb b/app/workers/gps_report_worker.rb index 69c581d1a..a20d86473 100644 --- a/app/workers/gps_report_worker.rb +++ b/app/workers/gps_report_worker.rb @@ -14,6 +14,7 @@ def perform private + DATABASE_NAME = 'gps_report' SUPPLIERS = %w[geoamey serco].freeze def post_results(results) @@ -55,16 +56,21 @@ def failure_csv_content(failures) def write_s3(content, supplier_name) return if content.blank? - folder_name = @date_range.first.strftime('%Y/%m/%d') - filename = "#{@date_range.first.strftime('%Y-%m-%d')}-#{@date_range.last.strftime('%Y-%m-%d')}" - - full_name = "#{folder_name}/#{filename}-#{supplier_name}.csv" - obj = bucket.object(full_name) + obj = bucket.object(full_path(supplier_name)) obj.put(body: content) obj end + def full_path(supplier_name) + "#{ENV.fetch('S3_REPORTING_PROJECT_PATH')}/" \ + 'data/' \ + "database_name=#{DATABASE_NAME}/" \ + "table_name=gps_reports_#{supplier_name}/" \ + "extraction_timestamp=#{@date_range.first.strftime('%Y%m%d%H%M%SZ')}/" \ + "#{@date_range.first.strftime('%Y-%m-%d')}-#{@date_range.last.strftime('%Y-%m-%d')}-gps-report.csv" + end + def bucket return @bucket if @bucket.present? diff --git a/helm_deploy/hmpps-book-secure-move-api/values.yaml b/helm_deploy/hmpps-book-secure-move-api/values.yaml index c0f4f9c7b..1b2cea641 100644 --- a/helm_deploy/hmpps-book-secure-move-api/values.yaml +++ b/helm_deploy/hmpps-book-secure-move-api/values.yaml @@ -38,6 +38,7 @@ generic-service: PROMETHEUS_METRICS: "on" RAILS_LOG_TO_STDOUT: "true" RAILS_MAX_THREADS: "5" + S3_REPORTING_PROJECT_PATH: landing/hmpps-book-secure-move-api SERVE_API_DOCS: "true" SIDEKIQ_PROCESSES: "3" diff --git a/spec/lib/cloud_data/reports_feed_spec.rb b/spec/lib/cloud_data/reports_feed_spec.rb index bb830e9e8..1b1ef0047 100644 --- a/spec/lib/cloud_data/reports_feed_spec.rb +++ b/spec/lib/cloud_data/reports_feed_spec.rb @@ -1,8 +1,34 @@ require 'rails_helper' RSpec.describe CloudData::ReportsFeed do + let(:s3_client) do + Aws::S3::Client.new( + access_key_id: s3_id, + secret_access_key: s3_key, + stub_responses: true, + ) + end + + let(:full_path) do + 'landing/hmpps-book-secure-move-api/data/' \ + 'database_name=feeds_report/' \ + 'table_name=moves/' \ + 'extraction_timestamp=20200129000000Z/' \ + '2020-01-29-moves.jsonl' + end + + let(:table) { 'moves' } + let(:content) { 'some content' } + let(:bucket_name) { 'bucket_name' } + let(:s3_id) { 'my_id' } + let(:s3_key) { 'token123' } + before do - Aws.config.update(stub_responses: true) # rubocop:disable Rails/SaveBang + allow(Aws::S3::Client).to receive(:new).with( + access_key_id: s3_id, + secret_access_key: s3_key, + ).and_return(s3_client) + Timecop.freeze(Time.zone.local(2020, 1, 30)) end @@ -10,21 +36,50 @@ Timecop.return end + around do |example| + ClimateControl.modify( + 'S3_REPORTING_ACCESS_KEY_ID' => s3_id, + 'S3_REPORTING_SECRET_ACCESS_KEY' => s3_key, + 'S3_REPORTING_PROJECT_PATH' => 'landing/hmpps-book-secure-move-api', + ) do + example.run + end + end + it 'writes a file on S3 and return the full_name' do - full_name = described_class.new('bucket_name') - .write('some content', 'report.json') + full_name = described_class.new(bucket_name) + .write(content, table) + + expect(full_name).to eq(full_path) + end + + it 'calls the S3 SDK with the correct data' do + described_class.new(bucket_name).write(content, table) - expect(full_name).to eq('2020/01/29/2020-01-29-report.json') + expect(s3_client.api_requests.first.slice(:operation_name, :params)).to eq({ + operation_name: :put_object, + params: { + body: content, + bucket: bucket_name, + key: full_path, + }, + }) end context 'when a report date is specified' do let(:report_date) { Date.new(2020, 1, 15) } - it 'creates a file and names it base on the report date ' do - full_name = described_class.new('bucket_name') - .write('some content', 'report.json', report_date) + it 'creates a file and names it based on the report date' do + full_name = described_class.new(bucket_name) + .write(content, table, report_date) - expect(full_name).to eq('2020/01/15/2020-01-15-report.json') + expect(full_name).to eq( + 'landing/hmpps-book-secure-move-api/data/' \ + 'database_name=feeds_report/' \ + 'table_name=moves/' \ + 'extraction_timestamp=20200115000000Z/' \ + '2020-01-15-moves.jsonl', + ) end end end diff --git a/spec/workers/feeds/feed_worker_spec.rb b/spec/workers/feeds/feed_worker_spec.rb index c173ea27a..25f6ad767 100644 --- a/spec/workers/feeds/feed_worker_spec.rb +++ b/spec/workers/feeds/feed_worker_spec.rb @@ -21,7 +21,7 @@ described_class.new.perform(feed_name, Date.yesterday.to_s) expect(feed_class).to have_received(:new).once.with(Date.yesterday.beginning_of_day, Date.yesterday.end_of_day) - expect(reports_feed_instance).to have_received(:write).once.with('feed_data', "#{feed_name.pluralize}.jsonl", Date.yesterday) + expect(reports_feed_instance).to have_received(:write).once.with('feed_data', feed_name.pluralize, Date.yesterday) end end end diff --git a/spec/workers/gps_report_worker_spec.rb b/spec/workers/gps_report_worker_spec.rb index 95b615b0e..31f48e622 100644 --- a/spec/workers/gps_report_worker_spec.rb +++ b/spec/workers/gps_report_worker_spec.rb @@ -9,12 +9,29 @@ serco: instance_double(GPSReport), } end + let(:slack_notifier) { instance_double(Slack::Notifier) } - let(:s3_object) { instance_double(Aws::S3::Object, presigned_url: 'aws_url') } + let(:s3_id) { 'my_id' } + let(:s3_key) { 'token123' } + + let(:s3_client) do + Aws::S3::Client.new( + access_key_id: s3_id, + secret_access_key: s3_key, + stub_responses: true, + ) + end around do |example| Timecop.freeze('2021-01-02 00:00:00') - example.run + ClimateControl.modify( + 'S3_REPORTING_ACCESS_KEY_ID' => s3_id, + 'S3_REPORTING_BUCKET_NAME' => 'moj-reg-dev', + 'S3_REPORTING_SECRET_ACCESS_KEY' => s3_key, + 'S3_REPORTING_PROJECT_PATH' => 'landing/hmpps-book-secure-move-api', + ) do + example.run + end Timecop.return end @@ -23,8 +40,10 @@ allow(GPSReport).to receive(:new).with(anything, 'serco').and_return(gps_reports[:serco]) allow(Slack::Notifier).to receive(:new).and_return(slack_notifier) allow(slack_notifier).to receive(:post) - allow(s3_object).to receive(:put) - allow(Aws::S3::Resource).to receive(:new).and_return(instance_double(Aws::S3::Resource, bucket: instance_double(Aws::S3::Bucket, object: s3_object))) + allow(Aws::S3::Client).to receive(:new).with( + access_key_id: s3_id, + secret_access_key: s3_key, + ).and_return(s3_client) end context 'when geoamey passes and serco fails' do @@ -56,19 +75,19 @@ }, failure_files: { geoamey: <<~STR, - reasons,no_journeys,no_gps_data - occurrences,1,2 + reasons,no_gps_data + occurrences,1 move ids - ,#{move.id},#{move.id} - ,,#{move.id} + ,#{move.id} STR serco: <<~STR, - reasons,no_gps_data - occurrences,1 + reasons,no_journeys,no_gps_data + occurrences,1,2 move ids - ,#{move.id} + ,#{move.id},#{move.id} + ,,#{move.id} STR }, } @@ -83,13 +102,28 @@ failures: [{ move: move, reason: 'no_journeys' }, { move: move, reason: 'no_gps_data' }, { move: move, reason: 'no_gps_data' }], move_count: 7, }) + allow_any_instance_of(Aws::S3::Object).to receive(:presigned_url).and_return('aws_url') # rubocop:disable RSpec/AnyInstance gps_report_worker.perform end it 'uploads the failure files to s3' do - expect(s3_object).to have_received(:put).with(body: test_data[:failure_files][:geoamey]) - expect(s3_object).to have_received(:put).with(body: test_data[:failure_files][:serco]) + expect(s3_client.api_requests.first.slice(:operation_name, :params)).to eq({ + operation_name: :put_object, + params: { + bucket: 'moj-reg-dev', + key: 'landing/hmpps-book-secure-move-api/data/database_name=gps_report/table_name=gps_reports_geoamey/extraction_timestamp=20201226000000Z/2020-12-26-2021-01-01-gps-report.csv', + body: test_data[:failure_files][:geoamey], + }, + }) + expect(s3_client.api_requests.second.slice(:operation_name, :params)).to eq( + operation_name: :put_object, + params: { + bucket: 'moj-reg-dev', + key: 'landing/hmpps-book-secure-move-api/data/database_name=gps_report/table_name=gps_reports_serco/extraction_timestamp=20201226000000Z/2020-12-26-2021-01-01-gps-report.csv', + body: test_data[:failure_files][:serco], + }, + ) end it 'posts the expected results message to slack' do