Skip to content

Commit

Permalink
Merge pull request #2120 from ministryofjustice/change-reporting-buck…
Browse files Browse the repository at this point in the history
…et-path--MAP-19

fix: Change the folder structure for the reporting bucket
  • Loading branch information
jimbali authored Jul 28, 2023
2 parents e46a3f7 + c987dcb commit d02ab3d
Show file tree
Hide file tree
Showing 7 changed files with 139 additions and 34 deletions.
21 changes: 15 additions & 6 deletions app/lib/cloud_data/reports_feed.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
module CloudData
class ReportsFeed
DATABASE_NAME = 'feeds_report'.freeze

def initialize(bucket_name = ENV['S3_REPORTING_BUCKET_NAME'])
client = Aws::S3::Client.new(
access_key_id: ENV['S3_REPORTING_ACCESS_KEY_ID'],
Expand All @@ -9,19 +11,26 @@ def initialize(bucket_name = ENV['S3_REPORTING_BUCKET_NAME'])
@bucket = @s3.bucket(bucket_name)
end

def write(content, obj_name, report_date = Time.zone.yesterday)
folder_name = report_date.strftime('%Y/%m/%d')
filename = report_date.strftime('%Y-%m-%d')
def write(content, table, report_date = Time.zone.yesterday)
path = full_path(report_date, table)

full_name = "#{folder_name}/#{filename}-#{obj_name}"
obj = bucket.object(full_name)
obj = bucket.object(path)
obj.put(body: content)

full_name
path
end

private

attr_reader :bucket

def full_path(report_date, table)
"#{ENV.fetch('S3_REPORTING_PROJECT_PATH')}/" \
'data/' \
"database_name=#{DATABASE_NAME}/" \
"table_name=#{table}/" \
"extraction_timestamp=#{report_date.strftime('%Y%m%d%H%M%SZ')}/" \
"#{report_date.strftime('%Y-%m-%d')}-#{table}.jsonl"
end
end
end
2 changes: 1 addition & 1 deletion app/workers/feeds/feed_worker.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def perform(feed_name, date)
time_since = TimeSince.new

feed = "Feeds::#{feed_name.titleize}".constantize.new(date.beginning_of_day, date.end_of_day).call
CloudData::ReportsFeed.new.write(feed, "#{feed_name.pluralize}.jsonl", date)
CloudData::ReportsFeed.new.write(feed, feed_name.pluralize, date)

Sidekiq.logger.info("Generated #{feed_name} feed in #{time_since.get} seconds")
end
Expand Down
16 changes: 11 additions & 5 deletions app/workers/gps_report_worker.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ def perform

private

DATABASE_NAME = 'gps_report'
SUPPLIERS = %w[geoamey serco].freeze

def post_results(results)
Expand Down Expand Up @@ -55,16 +56,21 @@ def failure_csv_content(failures)
def write_s3(content, supplier_name)
return if content.blank?

folder_name = @date_range.first.strftime('%Y/%m/%d')
filename = "#{@date_range.first.strftime('%Y-%m-%d')}-#{@date_range.last.strftime('%Y-%m-%d')}"

full_name = "#{folder_name}/#{filename}-#{supplier_name}.csv"
obj = bucket.object(full_name)
obj = bucket.object(full_path(supplier_name))
obj.put(body: content)

obj
end

def full_path(supplier_name)
"#{ENV.fetch('S3_REPORTING_PROJECT_PATH')}/" \
'data/' \
"database_name=#{DATABASE_NAME}/" \
"table_name=gps_reports_#{supplier_name}/" \
"extraction_timestamp=#{@date_range.first.strftime('%Y%m%d%H%M%SZ')}/" \
"#{@date_range.first.strftime('%Y-%m-%d')}-#{@date_range.last.strftime('%Y-%m-%d')}-gps-report.csv"
end

def bucket
return @bucket if @bucket.present?

Expand Down
1 change: 1 addition & 0 deletions helm_deploy/hmpps-book-secure-move-api/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ generic-service:
PROMETHEUS_METRICS: "on"
RAILS_LOG_TO_STDOUT: "true"
RAILS_MAX_THREADS: "5"
S3_REPORTING_PROJECT_PATH: landing/hmpps-book-secure-move-api
SERVE_API_DOCS: "true"
SIDEKIQ_PROCESSES: "3"

Expand Down
71 changes: 63 additions & 8 deletions spec/lib/cloud_data/reports_feed_spec.rb
Original file line number Diff line number Diff line change
@@ -1,30 +1,85 @@
require 'rails_helper'

RSpec.describe CloudData::ReportsFeed do
let(:s3_client) do
Aws::S3::Client.new(
access_key_id: s3_id,
secret_access_key: s3_key,
stub_responses: true,
)
end

let(:full_path) do
'landing/hmpps-book-secure-move-api/data/' \
'database_name=feeds_report/' \
'table_name=moves/' \
'extraction_timestamp=20200129000000Z/' \
'2020-01-29-moves.jsonl'
end

let(:table) { 'moves' }
let(:content) { 'some content' }
let(:bucket_name) { 'bucket_name' }
let(:s3_id) { 'my_id' }
let(:s3_key) { 'token123' }

before do
Aws.config.update(stub_responses: true) # rubocop:disable Rails/SaveBang
allow(Aws::S3::Client).to receive(:new).with(
access_key_id: s3_id,
secret_access_key: s3_key,
).and_return(s3_client)

Timecop.freeze(Time.zone.local(2020, 1, 30))
end

after do
Timecop.return
end

around do |example|
ClimateControl.modify(
'S3_REPORTING_ACCESS_KEY_ID' => s3_id,
'S3_REPORTING_SECRET_ACCESS_KEY' => s3_key,
'S3_REPORTING_PROJECT_PATH' => 'landing/hmpps-book-secure-move-api',
) do
example.run
end
end

it 'writes a file on S3 and return the full_name' do
full_name = described_class.new('bucket_name')
.write('some content', 'report.json')
full_name = described_class.new(bucket_name)
.write(content, table)

expect(full_name).to eq(full_path)
end

it 'calls the S3 SDK with the correct data' do
described_class.new(bucket_name).write(content, table)

expect(full_name).to eq('2020/01/29/2020-01-29-report.json')
expect(s3_client.api_requests.first.slice(:operation_name, :params)).to eq({
operation_name: :put_object,
params: {
body: content,
bucket: bucket_name,
key: full_path,
},
})
end

context 'when a report date is specified' do
let(:report_date) { Date.new(2020, 1, 15) }

it 'creates a file and names it base on the report date ' do
full_name = described_class.new('bucket_name')
.write('some content', 'report.json', report_date)
it 'creates a file and names it based on the report date' do
full_name = described_class.new(bucket_name)
.write(content, table, report_date)

expect(full_name).to eq('2020/01/15/2020-01-15-report.json')
expect(full_name).to eq(
'landing/hmpps-book-secure-move-api/data/' \
'database_name=feeds_report/' \
'table_name=moves/' \
'extraction_timestamp=20200115000000Z/' \
'2020-01-15-moves.jsonl',
)
end
end
end
2 changes: 1 addition & 1 deletion spec/workers/feeds/feed_worker_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
described_class.new.perform(feed_name, Date.yesterday.to_s)

expect(feed_class).to have_received(:new).once.with(Date.yesterday.beginning_of_day, Date.yesterday.end_of_day)
expect(reports_feed_instance).to have_received(:write).once.with('feed_data', "#{feed_name.pluralize}.jsonl", Date.yesterday)
expect(reports_feed_instance).to have_received(:write).once.with('feed_data', feed_name.pluralize, Date.yesterday)
end
end
end
Expand Down
60 changes: 47 additions & 13 deletions spec/workers/gps_report_worker_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,29 @@
serco: instance_double(GPSReport),
}
end

let(:slack_notifier) { instance_double(Slack::Notifier) }
let(:s3_object) { instance_double(Aws::S3::Object, presigned_url: 'aws_url') }
let(:s3_id) { 'my_id' }
let(:s3_key) { 'token123' }

let(:s3_client) do
Aws::S3::Client.new(
access_key_id: s3_id,
secret_access_key: s3_key,
stub_responses: true,
)
end

around do |example|
Timecop.freeze('2021-01-02 00:00:00')
example.run
ClimateControl.modify(
'S3_REPORTING_ACCESS_KEY_ID' => s3_id,
'S3_REPORTING_BUCKET_NAME' => 'moj-reg-dev',
'S3_REPORTING_SECRET_ACCESS_KEY' => s3_key,
'S3_REPORTING_PROJECT_PATH' => 'landing/hmpps-book-secure-move-api',
) do
example.run
end
Timecop.return
end

Expand All @@ -23,8 +40,10 @@
allow(GPSReport).to receive(:new).with(anything, 'serco').and_return(gps_reports[:serco])
allow(Slack::Notifier).to receive(:new).and_return(slack_notifier)
allow(slack_notifier).to receive(:post)
allow(s3_object).to receive(:put)
allow(Aws::S3::Resource).to receive(:new).and_return(instance_double(Aws::S3::Resource, bucket: instance_double(Aws::S3::Bucket, object: s3_object)))
allow(Aws::S3::Client).to receive(:new).with(
access_key_id: s3_id,
secret_access_key: s3_key,
).and_return(s3_client)
end

context 'when geoamey passes and serco fails' do
Expand Down Expand Up @@ -56,19 +75,19 @@
},
failure_files: {
geoamey: <<~STR,
reasons,no_journeys,no_gps_data
occurrences,1,2
reasons,no_gps_data
occurrences,1
move ids
,#{move.id},#{move.id}
,,#{move.id}
,#{move.id}
STR
serco: <<~STR,
reasons,no_gps_data
occurrences,1
reasons,no_journeys,no_gps_data
occurrences,1,2
move ids
,#{move.id}
,#{move.id},#{move.id}
,,#{move.id}
STR
},
}
Expand All @@ -83,13 +102,28 @@
failures: [{ move: move, reason: 'no_journeys' }, { move: move, reason: 'no_gps_data' }, { move: move, reason: 'no_gps_data' }],
move_count: 7,
})
allow_any_instance_of(Aws::S3::Object).to receive(:presigned_url).and_return('aws_url') # rubocop:disable RSpec/AnyInstance

gps_report_worker.perform
end

it 'uploads the failure files to s3' do
expect(s3_object).to have_received(:put).with(body: test_data[:failure_files][:geoamey])
expect(s3_object).to have_received(:put).with(body: test_data[:failure_files][:serco])
expect(s3_client.api_requests.first.slice(:operation_name, :params)).to eq({
operation_name: :put_object,
params: {
bucket: 'moj-reg-dev',
key: 'landing/hmpps-book-secure-move-api/data/database_name=gps_report/table_name=gps_reports_geoamey/extraction_timestamp=20201226000000Z/2020-12-26-2021-01-01-gps-report.csv',
body: test_data[:failure_files][:geoamey],
},
})
expect(s3_client.api_requests.second.slice(:operation_name, :params)).to eq(
operation_name: :put_object,
params: {
bucket: 'moj-reg-dev',
key: 'landing/hmpps-book-secure-move-api/data/database_name=gps_report/table_name=gps_reports_serco/extraction_timestamp=20201226000000Z/2020-12-26-2021-01-01-gps-report.csv',
body: test_data[:failure_files][:serco],
},
)
end

it 'posts the expected results message to slack' do
Expand Down

0 comments on commit d02ab3d

Please sign in to comment.