Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LGA-2574 use new irsa service account #971

Merged
merged 22 commits into from
Aug 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
27ff3ea
Use new irsa service account
said-moj Aug 2, 2023
cbd0795
Remove AWS environment vaariables
said-moj Aug 2, 2023
c15095f
Add service account to collect static job
said-moj Aug 2, 2023
15d8f79
Add service account to main values.yaml file
said-moj Aug 3, 2023
b4e6a3b
Update reports to push boto3 backed storage
said-moj Aug 4, 2023
1c92548
Remove dependency of old boto package
said-moj Aug 4, 2023
637fab7
Update celery to 4.0 and remove reference to old boto package from sq…
said-moj Aug 4, 2023
fdfdc31
Downgrading PyYAML to 5.3.1 due to 5.4 failing to build for Cython<3
said-moj Aug 7, 2023
60301d9
Celery no longer auto registers class based tasks. Manually register …
said-moj Aug 7, 2023
468273b
Use predefined_queues transport option defined in new version of kombu
said-moj Aug 7, 2023
0b0d640
Set obiee task name
said-moj Aug 7, 2023
93887bd
Starting from version 4.0, Celery uses message protocol version 2 as …
said-moj Aug 7, 2023
7590550
Remove unused sqs keys as we are using short lived tokens
said-moj Aug 8, 2023
b672347
Adding service account to worker deployment
said-moj Aug 8, 2023
a74b713
Kombu versions above 4.1 require celery 4.2 otherwise you will get a …
said-moj Aug 8, 2023
60b6638
Install celery[sqs] so that dependencies like pycurl get installed
said-moj Aug 8, 2023
f6261c7
Adding steps to install ibcurl4-gnutls-dev because it is required for…
said-moj Aug 8, 2023
0bd0938
Manually register reason for contacting export task
said-moj Aug 9, 2023
7e00724
Set name of default export task
said-moj Aug 9, 2023
6930fd9
Kombu versions above 4.1 require celery 4.2 otherwise you will get a …
said-moj Aug 8, 2023
29883e6
Clean s3 file name and remove trailing slash
said-moj Aug 10, 2023
1e2cfa9
Add comment for why we need to seek the download to the start
said-moj Aug 16, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,9 @@ jobs:
- run:
name: Setup Python environment
command: |
echo "Installing ibcurl4-gnutls-dev because it is required for pycurl"
sudo apt-get update
sudo apt-get install -y libcurl4-gnutls-dev
pip install virtualenv
virtualenv pip-compile-env
- restore_cache:
Expand Down Expand Up @@ -137,6 +140,9 @@ jobs:
- run:
name: Setup Python environment
command: |
echo "Installing ibcurl4-gnutls-dev because it is required for pycurl"
sudo apt-get update
sudo apt-get install -y libcurl4-gnutls-dev
sudo apt-get update && sudo apt-get install -y libpython2.7 firefox
pip install virtualenv
virtualenv env
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ RUN adduser -D app && \
# To install pip dependencies
RUN apk add --no-cache \
build-base \
curl \
curl curl-dev \
git \
libxml2-dev \
libxslt-dev \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from datetime import datetime, time
from itertools import groupby

import boto
from django.conf import settings
from django.core import serializers
from django.core.management.base import BaseCommand
Expand All @@ -12,7 +11,7 @@

from cla_eventlog.constants import LOG_LEVELS, LOG_TYPES
from cla_eventlog.models import Log
from reports.utils import get_s3_connection
from cla_backend.libs.aws.s3 import ReportsS3, ClientError

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -75,7 +74,7 @@ def remove_same_day_consecutive_outcome_codes(self):
dupes_to_remove = Log.objects.filter(id__in=same_day_consecutive_outcome_log_ids)
try:
self.write_queryset_to_s3(dupes_to_remove)
except boto.exception.S3ResponseError as e:
except ClientError as e:
logger.error(
"LGA-125: Could not get bucket {}: {}".format(settings.AWS_DELETED_OBJECTS_BUCKET_NAME, e)
)
Expand All @@ -86,20 +85,7 @@ def remove_same_day_consecutive_outcome_codes(self):
logger.info("LGA-125: No dupe logs to remove")

def write_queryset_to_s3(self, queryset):
bucket = self.get_or_create_s3_bucket()
key = bucket.new_key("deleted-log-objects-{}".format(now().isoformat()))
serialized_queryset = serializers.serialize("json", queryset)
key.set_contents_from_string(serialized_queryset)
# Restore with:
# for restored_log_object in serializers.deserialize('json', serialized_queryset):
# restored_log_object.save()

@staticmethod
def get_or_create_s3_bucket():
conn = get_s3_connection()
bucket_name = settings.AWS_DELETED_OBJECTS_BUCKET_NAME
try:
return conn.get_bucket(bucket_name)
except boto.exception.S3ResponseError:
conn.create_bucket(bucket_name, location=settings.AWS_S3_REGION_NAME)
return conn.get_bucket(bucket_name)
key = "deleted-log-objects-{}".format(now().isoformat())
serialized_queryset = serializers.serialize("json", queryset)
ReportsS3.save_data_to_bucket(bucket_name, key, serialized_queryset)
9 changes: 3 additions & 6 deletions cla_backend/apps/reports/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from model_utils.models import TimeStampedModel
from reports.constants import EXPORT_STATUS
from reports.utils import get_s3_connection
from cla_backend.libs.aws.s3 import ReportsS3


class Export(TimeStampedModel):
Expand All @@ -24,12 +24,9 @@ def link(self):
def delete_export_file(sender, instance=None, **kwargs):
# check if there is a connection to aws, otherwise delete locally
if settings.AWS_REPORTS_STORAGE_BUCKET_NAME:
conn = get_s3_connection()
bucket = conn.lookup(settings.AWS_REPORTS_STORAGE_BUCKET_NAME)

try:
k = bucket.get_key(settings.EXPORT_DIR + os.path.basename(instance.path))
bucket.delete_key(k)
key = settings.EXPORT_DIR + os.path.basename(instance.path)
ReportsS3.delete_file(settings.AWS_REPORTS_STORAGE_BUCKET_NAME, key)
except (ValueError, AttributeError):
pass
else:
Expand Down
30 changes: 19 additions & 11 deletions cla_backend/apps/reports/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@
from dateutil.relativedelta import relativedelta
from django.conf import settings

from .utils import OBIEEExporter, get_s3_connection
from .utils import OBIEEExporter
from cla_backend.libs.aws.s3 import ReportsS3
from .models import Export
from .constants import EXPORT_STATUS
from core.utils import remember_cwd
from checker.models import ReasonForContacting
from urlparse import urlparse
from cla_backend.celery import app

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -79,20 +81,14 @@ def on_failure(self, exc, task_id, args, kwargs, einfo):
self.export.save()

def send_to_s3(self):
conn = get_s3_connection()
try:
bucket = conn.get_bucket(settings.AWS_REPORTS_STORAGE_BUCKET_NAME)
except Exception as e:
logger.error(
"Reports bucket couldn't be fetched. Ensure s3 credentials set. You may need the S3_USE_SIGV4 env var"
)
raise e
k = bucket.new_key(settings.EXPORT_DIR + os.path.basename(self.filepath))
k.set_contents_from_filename(self.filepath)
key = settings.EXPORT_DIR + os.path.basename(self.filepath)
ReportsS3.save_file(settings.AWS_REPORTS_STORAGE_BUCKET_NAME, key, self.filepath)
shutil.rmtree(self.filepath, ignore_errors=True)


class ExportTask(ExportTaskBase):
name = "exporttask"

def run(self, user_id, filename, form_class_name, post_data, *args, **kwargs):
self.user = User.objects.get(pk=user_id)
self._create_export()
Expand All @@ -118,6 +114,8 @@ def run(self, user_id, filename, form_class_name, post_data, *args, **kwargs):


class OBIEEExportTask(ExportTaskBase):
name = "obieeexporttask"

def run(self, user_id, filename, form_class_name, post_data, *args, **kwargs):
"""
Export a full dump of the db for OBIEE export and make it available
Expand Down Expand Up @@ -158,6 +156,8 @@ def run(self, user_id, filename, form_class_name, post_data, *args, **kwargs):


class ReasonForContactingExportTask(ExportTaskBase):
name = "reasonforcontactingexport"

def run(self, user_id, filename, form_class_name, post_data, *args, **kwargs):
"""
Export csv files for each of the referrers from reason for contacting
Expand Down Expand Up @@ -229,3 +229,11 @@ def generate_rfc_zip(self):
with ZipFile(self.filepath, "w") as refer_zip:
for csv_file in glob.glob("*.csv"):
refer_zip.write(csv_file)


# The Task base class no longer automatically register tasks
# https://docs.celeryq.dev/en/v4.0.0/whatsnew-4.0.html#the-task-base-class-no-longer-automatically-register-tasks
# https://github.com/celery/celery/issues/5992
tasks = [ExportTask, OBIEEExportTask(), ReasonForContactingExportTask()]
for task in tasks:
app.tasks.register(task)
8 changes: 4 additions & 4 deletions cla_backend/apps/reports/tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@


class DeleteExportFile(TestCase):
@patch("reports.models.get_s3_connection")
@patch("cla_backend.libs.aws.s3.ReportsS3.get_s3_connection")
def test_delete_export_file_no_aws(self, mock_s3):
with patch("os.remove") as mock_remove:
settings.AWS_REPORTS_STORAGE_BUCKET_NAME = ""
Expand All @@ -16,11 +16,11 @@ def test_delete_export_file_no_aws(self, mock_s3):
assert mock_remove.called
assert not mock_s3.called

@patch("reports.models.get_s3_connection", return_value=MagicMock())
@patch("cla_backend.libs.aws.s3.ReportsS3.get_s3_connection", return_value=MagicMock())
def test_delete_export_file_with_aws(self, mock_s3):
settings.AWS_REPORTS_STORAGE_BUCKET_NAME = "AWS_TEST"
sender = MagicMock()
instance = None
# delete_export_file(sender, instance=None, **kwargs)
instance = MagicMock()
instance.path = "/tmp/test.txt"
delete_export_file(sender, instance)
assert mock_s3.called
19 changes: 0 additions & 19 deletions cla_backend/apps/reports/tests/test_utils.py

This file was deleted.

4 changes: 2 additions & 2 deletions cla_backend/apps/reports/tests/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@


class DownloadFileTestCase(TestCase):
@patch("reports.views.get_s3_connection")
@patch("cla_backend.libs.aws.s3.ReportsS3.get_s3_connection")
def test_download_no_aws(self, mock_s3):
# mock pythons open()
with patch("__builtin__.open", mock_open(read_data="data")) as mock_file:
Expand All @@ -24,7 +24,7 @@ def test_download_no_aws(self, mock_s3):
# built in Open method is called in views.py
mock_file.assert_called_with(file_path, "r")

@patch("reports.views.get_s3_connection", return_value=MagicMock())
@patch("cla_backend.libs.aws.s3.ReportsS3.get_s3_connection", return_value=MagicMock())
def test_download_with_aws(self, mock_s3):
mock_request = MagicMock()
# if file_name contains string "schedule"
Expand Down
5 changes: 0 additions & 5 deletions cla_backend/apps/reports/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import tempfile
from datetime import date, datetime, time, timedelta

import boto
import pyminizip
from django.core.exceptions import ImproperlyConfigured as DjangoImproperlyConfigured
from django.conf import settings
Expand Down Expand Up @@ -159,7 +158,3 @@ def close(self):
os.remove(self.full_path)
if os.path.exists(self.tmp_export_path):
shutil.rmtree(self.tmp_export_path)


def get_s3_connection():
return boto.connect_s3(settings.AWS_ACCESS_KEY_ID, settings.AWS_SECRET_ACCESS_KEY, host=settings.AWS_S3_HOST)
22 changes: 11 additions & 11 deletions cla_backend/apps/reports/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

from reports.models import Export
from .tasks import ExportTask, OBIEEExportTask, ReasonForContactingExportTask
from reports.utils import get_s3_connection
from cla_backend.libs.aws.s3 import ReportsS3


def report_view(request, form_class, title, template="case_report", success_task=ExportTask, file_name=None):
Expand All @@ -51,13 +51,15 @@ def report_view(request, form_class, title, template="case_report", success_task
success_task().delay(request.user.pk, filename, form_class.__name__, json.dumps(request.POST))
messages.info(request, u"Your export is being processed. It will show up in the downloads tab shortly.")

return render(request, tmpl, {'has_permission': admin_site_instance.has_permission(request), "title": title, "form": form})
return render(
request, tmpl, {"has_permission": admin_site_instance.has_permission(request), "title": title, "form": form}
)


def scheduled_report_view(request, title):
tmpl = "admin/reports/case_report.html"
admin_site_instance = AdminSite()
return render(request, tmpl, {"title": title, 'has_permission': admin_site_instance.has_permission(request)})
return render(request, tmpl, {"title": title, "has_permission": admin_site_instance.has_permission(request)})


def valid_submit(request, form):
Expand Down Expand Up @@ -201,18 +203,16 @@ def reasons_for_contacting(request):
def download_file(request, file_name="", *args, **kwargs):
# check if there is a connection to aws, otherwise download from local TEMP_DIR
if settings.AWS_REPORTS_STORAGE_BUCKET_NAME:
conn = get_s3_connection()
bucket = conn.lookup(settings.AWS_REPORTS_STORAGE_BUCKET_NAME)
k = bucket.get_key(settings.EXPORT_DIR + file_name)
bucket_name = settings.AWS_REPORTS_STORAGE_BUCKET_NAME
key = settings.EXPORT_DIR + file_name
obj = ReportsS3.download_file(bucket_name, key)

if k is None:
if obj is None:
raise Http404("Export does not exist")

k.open_read()
headers = dict(k.resp.getheaders())
response = HttpResponse(k)
response = HttpResponse(obj["body"])
said-moj marked this conversation as resolved.
Dismissed
Show resolved Hide resolved

for key, val in headers.items():
for key, val in obj["headers"].items():
response[key] = val
else:
# only do this locally if debugging
Expand Down
41 changes: 41 additions & 0 deletions cla_backend/libs/aws/s3.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,46 @@
from tempfile import NamedTemporaryFile
from storages.backends.s3boto3 import S3Boto3Storage
from botocore.exceptions import ClientError


class ClientError(ClientError):
pass


class StaticS3Storage(S3Boto3Storage):
default_acl = "public-read"


class ReportsS3:
@classmethod
def clean_name(cls, name):
return name.strip("/")

@classmethod
def get_s3_connection(cls, bucket_name):
return S3Boto3Storage(bucket=bucket_name)

@classmethod
def download_file(cls, bucket_name, key):
try:
obj = cls.get_s3_connection(bucket_name).bucket.Object(cls.clean_name(key))
data = NamedTemporaryFile()
obj.download_fileobj(data)
# This required otherwise any file reads will start at the end which
# leads to an empty file being downloaded (zero bytes)
data.seek(0)
said-moj marked this conversation as resolved.
Show resolved Hide resolved
return {"headers": {"Content-Type": obj.content_type}, "body": data}
except ClientError:
return None

@classmethod
def save_file(cls, bucket_name, key, path):
cls.get_s3_connection(bucket_name).bucket.Object(cls.clean_name(key)).upload_file(path)

@classmethod
def delete_file(cls, bucket_name, key):
cls.get_s3_connection(bucket_name).delete(cls.clean_name(key))

@classmethod
def save_data_to_bucket(cls, bucket_name, key, content):
cls.get_s3_connection(bucket_name).bucket.Object(key).put(Body=content)
Loading
Loading