From 27ff3eab3abe91623da3d6118ce18b8d51a82038 Mon Sep 17 00:00:00 2001 From: said-moj <45761276+said-moj@users.noreply.github.com> Date: Wed, 2 Aug 2023 13:35:41 +0100 Subject: [PATCH 01/22] Use new irsa service account --- helm_deploy/cla-backend/templates/_helpers.tpl | 2 +- helm_deploy/cla-backend/values-uat.yaml | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/helm_deploy/cla-backend/templates/_helpers.tpl b/helm_deploy/cla-backend/templates/_helpers.tpl index 4ef2fbcaa..263d79ece 100644 --- a/helm_deploy/cla-backend/templates/_helpers.tpl +++ b/helm_deploy/cla-backend/templates/_helpers.tpl @@ -68,7 +68,7 @@ Create the name of the service account to use {{- if .Values.serviceAccount.create -}} {{ default (include "cla-backend.fullname" .) .Values.serviceAccount.name }} {{- else -}} - {{ default "default" .Values.serviceAccount.name }} + {{.Release.Namespace }}-{{.Values.serviceAccount.name }} {{- end -}} {{- end -}} diff --git a/helm_deploy/cla-backend/values-uat.yaml b/helm_deploy/cla-backend/values-uat.yaml index c3708f375..afa6874ec 100644 --- a/helm_deploy/cla-backend/values-uat.yaml +++ b/helm_deploy/cla-backend/values-uat.yaml @@ -18,6 +18,9 @@ ingress: whitelist_additional: - 52.210.85.116/32 +serviceAccount: + name: irsa-sevice-account + # Lists don't deep merge, so this list of envVars overrides anything defined in an earlier values file envVars: DEBUG: From cbd0795de464414f34f362d22de1d1253f1564e3 Mon Sep 17 00:00:00 2001 From: said-moj <45761276+said-moj@users.noreply.github.com> Date: Wed, 2 Aug 2023 15:07:40 +0100 Subject: [PATCH 02/22] Remove AWS environment vaariables --- cla_backend/settings/base.py | 2 -- helm_deploy/cla-backend/values.yaml | 8 -------- 2 files changed, 10 deletions(-) diff --git a/cla_backend/settings/base.py b/cla_backend/settings/base.py index 384e47809..00fb3a2b4 100644 --- a/cla_backend/settings/base.py +++ b/cla_backend/settings/base.py @@ -113,8 +113,6 @@ def env_var_truthy_intention(name): STATICFILES_STORAGE = "cla_backend.libs.aws.s3.StaticS3Storage" AWS_S3_REGION_NAME = os.environ.get("AWS_S3_REGION_NAME", "eu-west-1") -AWS_ACCESS_KEY_ID = os.environ.get("AWS_ACCESS_KEY_ID") -AWS_SECRET_ACCESS_KEY = os.environ.get("AWS_SECRET_ACCESS_KEY") AWS_DEFAULT_ACL = None AWS_QUERYSTRING_AUTH = False diff --git a/helm_deploy/cla-backend/values.yaml b/helm_deploy/cla-backend/values.yaml index b5f6eebfd..c2a03e501 100644 --- a/helm_deploy/cla-backend/values.yaml +++ b/helm_deploy/cla-backend/values.yaml @@ -182,14 +182,6 @@ envVars: secret: name: s3 key: static_files_bucket_name - AWS_ACCESS_KEY_ID: - secret: - name: s3 - key: access_key_id - AWS_SECRET_ACCESS_KEY: - secret: - name: s3 - key: secret_access_key AWS_DELETED_OBJECTS_BUCKET_NAME: secret: name: s3 From c15095f80ec846c8d566746437b1b519eeda2127 Mon Sep 17 00:00:00 2001 From: said-moj <45761276+said-moj@users.noreply.github.com> Date: Wed, 2 Aug 2023 16:20:15 +0100 Subject: [PATCH 03/22] Add service account to collect static job --- helm_deploy/cla-backend/templates/collect-static-job.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/helm_deploy/cla-backend/templates/collect-static-job.yml b/helm_deploy/cla-backend/templates/collect-static-job.yml index 5ba7a229c..a1f4e1f6b 100644 --- a/helm_deploy/cla-backend/templates/collect-static-job.yml +++ b/helm_deploy/cla-backend/templates/collect-static-job.yml @@ -20,6 +20,7 @@ spec: helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" spec: restartPolicy: Never + serviceAccountName: {{ include "cla-backend.serviceAccountName" . }} containers: - name: collect-static image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" From 15d8f79894c00ceeed01d5bc1640becb8ca58c36 Mon Sep 17 00:00:00 2001 From: said-moj <45761276+said-moj@users.noreply.github.com> Date: Thu, 3 Aug 2023 10:03:48 +0100 Subject: [PATCH 04/22] Add service account to main values.yaml file --- helm_deploy/cla-backend/values-uat.yaml | 3 --- helm_deploy/cla-backend/values.yaml | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/helm_deploy/cla-backend/values-uat.yaml b/helm_deploy/cla-backend/values-uat.yaml index afa6874ec..c3708f375 100644 --- a/helm_deploy/cla-backend/values-uat.yaml +++ b/helm_deploy/cla-backend/values-uat.yaml @@ -18,9 +18,6 @@ ingress: whitelist_additional: - 52.210.85.116/32 -serviceAccount: - name: irsa-sevice-account - # Lists don't deep merge, so this list of envVars overrides anything defined in an earlier values file envVars: DEBUG: diff --git a/helm_deploy/cla-backend/values.yaml b/helm_deploy/cla-backend/values.yaml index c2a03e501..04b979a5d 100644 --- a/helm_deploy/cla-backend/values.yaml +++ b/helm_deploy/cla-backend/values.yaml @@ -20,7 +20,7 @@ serviceAccount: annotations: {} # The name of the service account to use. # If not set and create is true, a name is generated using the fullname template - name: + name: irsa-sevice-account podSecurityContext: {} # fsGroup: 2000 From b4e6a3be98b358fdd121baaf1255c6d466276837 Mon Sep 17 00:00:00 2001 From: said-moj <45761276+said-moj@users.noreply.github.com> Date: Fri, 4 Aug 2023 10:15:42 +0100 Subject: [PATCH 05/22] Update reports to push boto3 backed storage --- ...move_same_day_consecutive_outcome_codes.py | 21 +++----------- cla_backend/apps/reports/models.py | 9 ++---- cla_backend/apps/reports/tasks.py | 15 +++------- cla_backend/apps/reports/tests/test_models.py | 8 +++--- cla_backend/apps/reports/tests/test_utils.py | 19 ------------- cla_backend/apps/reports/tests/test_views.py | 4 +-- cla_backend/apps/reports/utils.py | 5 ---- cla_backend/apps/reports/views.py | 22 +++++++-------- cla_backend/libs/aws/s3.py | 28 +++++++++++++++++++ 9 files changed, 56 insertions(+), 75 deletions(-) delete mode 100644 cla_backend/apps/reports/tests/test_utils.py diff --git a/cla_backend/apps/cla_eventlog/management/commands/remove_same_day_consecutive_outcome_codes.py b/cla_backend/apps/cla_eventlog/management/commands/remove_same_day_consecutive_outcome_codes.py index 12b367612..838520bcc 100644 --- a/cla_backend/apps/cla_eventlog/management/commands/remove_same_day_consecutive_outcome_codes.py +++ b/cla_backend/apps/cla_eventlog/management/commands/remove_same_day_consecutive_outcome_codes.py @@ -12,7 +12,7 @@ from cla_eventlog.constants import LOG_LEVELS, LOG_TYPES from cla_eventlog.models import Log -from reports.utils import get_s3_connection +from cla_backend.libs.aws.s3 import ReportsS3 logger = logging.getLogger(__name__) @@ -86,20 +86,7 @@ def remove_same_day_consecutive_outcome_codes(self): logger.info("LGA-125: No dupe logs to remove") def write_queryset_to_s3(self, queryset): - bucket = self.get_or_create_s3_bucket() - key = bucket.new_key("deleted-log-objects-{}".format(now().isoformat())) - serialized_queryset = serializers.serialize("json", queryset) - key.set_contents_from_string(serialized_queryset) - # Restore with: - # for restored_log_object in serializers.deserialize('json', serialized_queryset): - # restored_log_object.save() - - @staticmethod - def get_or_create_s3_bucket(): - conn = get_s3_connection() bucket_name = settings.AWS_DELETED_OBJECTS_BUCKET_NAME - try: - return conn.get_bucket(bucket_name) - except boto.exception.S3ResponseError: - conn.create_bucket(bucket_name, location=settings.AWS_S3_REGION_NAME) - return conn.get_bucket(bucket_name) + key = "deleted-log-objects-{}".format(now().isoformat()) + serialized_queryset = serializers.serialize("json", queryset) + ReportsS3.save_data_to_bucket(bucket_name, key, serialized_queryset) diff --git a/cla_backend/apps/reports/models.py b/cla_backend/apps/reports/models.py index 37b87e6bf..d4052e8ae 100644 --- a/cla_backend/apps/reports/models.py +++ b/cla_backend/apps/reports/models.py @@ -5,7 +5,7 @@ from model_utils.models import TimeStampedModel from reports.constants import EXPORT_STATUS -from reports.utils import get_s3_connection +from cla_backend.libs.aws.s3 import ReportsS3 class Export(TimeStampedModel): @@ -24,12 +24,9 @@ def link(self): def delete_export_file(sender, instance=None, **kwargs): # check if there is a connection to aws, otherwise delete locally if settings.AWS_REPORTS_STORAGE_BUCKET_NAME: - conn = get_s3_connection() - bucket = conn.lookup(settings.AWS_REPORTS_STORAGE_BUCKET_NAME) - try: - k = bucket.get_key(settings.EXPORT_DIR + os.path.basename(instance.path)) - bucket.delete_key(k) + key = settings.EXPORT_DIR + os.path.basename(instance.path) + ReportsS3.delete_file(settings.AWS_REPORTS_STORAGE_BUCKET_NAME, key) except (ValueError, AttributeError): pass else: diff --git a/cla_backend/apps/reports/tasks.py b/cla_backend/apps/reports/tasks.py index 8f384a9fb..d7e1196f5 100644 --- a/cla_backend/apps/reports/tasks.py +++ b/cla_backend/apps/reports/tasks.py @@ -18,7 +18,8 @@ from dateutil.relativedelta import relativedelta from django.conf import settings -from .utils import OBIEEExporter, get_s3_connection +from .utils import OBIEEExporter +from cla_backend.libs.aws.s3 import ReportsS3 from .models import Export from .constants import EXPORT_STATUS from core.utils import remember_cwd @@ -79,16 +80,8 @@ def on_failure(self, exc, task_id, args, kwargs, einfo): self.export.save() def send_to_s3(self): - conn = get_s3_connection() - try: - bucket = conn.get_bucket(settings.AWS_REPORTS_STORAGE_BUCKET_NAME) - except Exception as e: - logger.error( - "Reports bucket couldn't be fetched. Ensure s3 credentials set. You may need the S3_USE_SIGV4 env var" - ) - raise e - k = bucket.new_key(settings.EXPORT_DIR + os.path.basename(self.filepath)) - k.set_contents_from_filename(self.filepath) + key = settings.EXPORT_DIR + os.path.basename(self.filepath) + ReportsS3.save_file(settings.AWS_REPORTS_STORAGE_BUCKET_NAME, key, self.filepath) shutil.rmtree(self.filepath, ignore_errors=True) diff --git a/cla_backend/apps/reports/tests/test_models.py b/cla_backend/apps/reports/tests/test_models.py index 7c340a3f3..f47c9bd9e 100644 --- a/cla_backend/apps/reports/tests/test_models.py +++ b/cla_backend/apps/reports/tests/test_models.py @@ -5,7 +5,7 @@ class DeleteExportFile(TestCase): - @patch("reports.models.get_s3_connection") + @patch("cla_backend.libs.aws.s3.ReportsS3.get_s3_connection") def test_delete_export_file_no_aws(self, mock_s3): with patch("os.remove") as mock_remove: settings.AWS_REPORTS_STORAGE_BUCKET_NAME = "" @@ -16,11 +16,11 @@ def test_delete_export_file_no_aws(self, mock_s3): assert mock_remove.called assert not mock_s3.called - @patch("reports.models.get_s3_connection", return_value=MagicMock()) + @patch("cla_backend.libs.aws.s3.ReportsS3.get_s3_connection", return_value=MagicMock()) def test_delete_export_file_with_aws(self, mock_s3): settings.AWS_REPORTS_STORAGE_BUCKET_NAME = "AWS_TEST" sender = MagicMock() - instance = None - # delete_export_file(sender, instance=None, **kwargs) + instance = MagicMock() + instance.path = "/tmp/test.txt" delete_export_file(sender, instance) assert mock_s3.called diff --git a/cla_backend/apps/reports/tests/test_utils.py b/cla_backend/apps/reports/tests/test_utils.py deleted file mode 100644 index 3effc13f1..000000000 --- a/cla_backend/apps/reports/tests/test_utils.py +++ /dev/null @@ -1,19 +0,0 @@ -import mock -import os - -from boto.s3.connection import S3Connection -from django.test import TestCase, override_settings -from reports.utils import get_s3_connection - - -class UtilsTestCase(TestCase): - @override_settings( - AWS_ACCESS_KEY_ID="000000000001", - AWS_SECRET_ACCESS_KEY="000000000002", - AWS_S3_HOST="s3.eu-west-2.amazonaws.com", - ) - def test_get_s3_connection(self): - envs = {"S3_USE_SIGV4": "True"} - with mock.patch.dict(os.environ, envs): - conn = get_s3_connection() - self.assertIsInstance(conn, S3Connection) diff --git a/cla_backend/apps/reports/tests/test_views.py b/cla_backend/apps/reports/tests/test_views.py index a95262524..0e5d1bca7 100644 --- a/cla_backend/apps/reports/tests/test_views.py +++ b/cla_backend/apps/reports/tests/test_views.py @@ -6,7 +6,7 @@ class DownloadFileTestCase(TestCase): - @patch("reports.views.get_s3_connection") + @patch("cla_backend.libs.aws.s3.ReportsS3.get_s3_connection") def test_download_no_aws(self, mock_s3): # mock pythons open() with patch("__builtin__.open", mock_open(read_data="data")) as mock_file: @@ -24,7 +24,7 @@ def test_download_no_aws(self, mock_s3): # built in Open method is called in views.py mock_file.assert_called_with(file_path, "r") - @patch("reports.views.get_s3_connection", return_value=MagicMock()) + @patch("cla_backend.libs.aws.s3.ReportsS3.get_s3_connection", return_value=MagicMock()) def test_download_with_aws(self, mock_s3): mock_request = MagicMock() # if file_name contains string "schedule" diff --git a/cla_backend/apps/reports/utils.py b/cla_backend/apps/reports/utils.py index b4b9009bc..b3d14df92 100644 --- a/cla_backend/apps/reports/utils.py +++ b/cla_backend/apps/reports/utils.py @@ -4,7 +4,6 @@ import tempfile from datetime import date, datetime, time, timedelta -import boto import pyminizip from django.core.exceptions import ImproperlyConfigured as DjangoImproperlyConfigured from django.conf import settings @@ -159,7 +158,3 @@ def close(self): os.remove(self.full_path) if os.path.exists(self.tmp_export_path): shutil.rmtree(self.tmp_export_path) - - -def get_s3_connection(): - return boto.connect_s3(settings.AWS_ACCESS_KEY_ID, settings.AWS_SECRET_ACCESS_KEY, host=settings.AWS_S3_HOST) diff --git a/cla_backend/apps/reports/views.py b/cla_backend/apps/reports/views.py index 97d87322b..51c41cefb 100644 --- a/cla_backend/apps/reports/views.py +++ b/cla_backend/apps/reports/views.py @@ -34,7 +34,7 @@ from reports.models import Export from .tasks import ExportTask, OBIEEExportTask, ReasonForContactingExportTask -from reports.utils import get_s3_connection +from cla_backend.libs.aws.s3 import ReportsS3 def report_view(request, form_class, title, template="case_report", success_task=ExportTask, file_name=None): @@ -51,13 +51,15 @@ def report_view(request, form_class, title, template="case_report", success_task success_task().delay(request.user.pk, filename, form_class.__name__, json.dumps(request.POST)) messages.info(request, u"Your export is being processed. It will show up in the downloads tab shortly.") - return render(request, tmpl, {'has_permission': admin_site_instance.has_permission(request), "title": title, "form": form}) + return render( + request, tmpl, {"has_permission": admin_site_instance.has_permission(request), "title": title, "form": form} + ) def scheduled_report_view(request, title): tmpl = "admin/reports/case_report.html" admin_site_instance = AdminSite() - return render(request, tmpl, {"title": title, 'has_permission': admin_site_instance.has_permission(request)}) + return render(request, tmpl, {"title": title, "has_permission": admin_site_instance.has_permission(request)}) def valid_submit(request, form): @@ -201,18 +203,16 @@ def reasons_for_contacting(request): def download_file(request, file_name="", *args, **kwargs): # check if there is a connection to aws, otherwise download from local TEMP_DIR if settings.AWS_REPORTS_STORAGE_BUCKET_NAME: - conn = get_s3_connection() - bucket = conn.lookup(settings.AWS_REPORTS_STORAGE_BUCKET_NAME) - k = bucket.get_key(settings.EXPORT_DIR + file_name) + bucket_name = settings.AWS_REPORTS_STORAGE_BUCKET_NAME + key = settings.EXPORT_DIR + file_name + obj = ReportsS3.download_file(bucket_name, key) - if k is None: + if obj is None: raise Http404("Export does not exist") - k.open_read() - headers = dict(k.resp.getheaders()) - response = HttpResponse(k) + response = HttpResponse(obj["body"]) - for key, val in headers.items(): + for key, val in obj["headers"].items(): response[key] = val else: # only do this locally if debugging diff --git a/cla_backend/libs/aws/s3.py b/cla_backend/libs/aws/s3.py index 362c4a0eb..49b9a856f 100644 --- a/cla_backend/libs/aws/s3.py +++ b/cla_backend/libs/aws/s3.py @@ -1,5 +1,33 @@ from storages.backends.s3boto3 import S3Boto3Storage +from botocore.exceptions import ClientError class StaticS3Storage(S3Boto3Storage): default_acl = "public-read" + + +class ReportsS3: + @classmethod + def get_s3_connection(cls, bucket_name): + return StaticS3Storage(bucket=bucket_name) + + @classmethod + def download_file(cls, bucket_name, key): + try: + response = cls.get_s3_connection(bucket_name).bucket.Object(key).get() + except ClientError: + return None + + return {"headers": {"Content-Type": response["ContentType"]}, "body": response["Body"]} + + @classmethod + def save_file(cls, bucket_name, key, path): + cls.get_s3_connection(bucket_name).save(key, open(path, "rb")) + + @classmethod + def delete_file(cls, bucket_name, key): + cls.get_s3_connection(bucket_name).delete(key) + + @classmethod + def save_data_to_bucket(cls, bucket_name, key, content): + cls.get_s3_connection(bucket_name).bucket.Object(key).put(Body=content) From 1c925486cc2d4a3c0e5871e02691bd690f0ecc5a Mon Sep 17 00:00:00 2001 From: said-moj <45761276+said-moj@users.noreply.github.com> Date: Fri, 4 Aug 2023 10:30:46 +0100 Subject: [PATCH 06/22] Remove dependency of old boto package --- .../commands/remove_same_day_consecutive_outcome_codes.py | 5 ++--- cla_backend/libs/aws/s3.py | 4 ++++ requirements/generated/requirements-dev.txt | 2 -- requirements/generated/requirements-docs.txt | 2 -- requirements/generated/requirements-production.txt | 2 -- requirements/generated/requirements-testing.txt | 2 -- requirements/source/requirements-base.in | 1 - 7 files changed, 6 insertions(+), 12 deletions(-) diff --git a/cla_backend/apps/cla_eventlog/management/commands/remove_same_day_consecutive_outcome_codes.py b/cla_backend/apps/cla_eventlog/management/commands/remove_same_day_consecutive_outcome_codes.py index 838520bcc..b47e80b9d 100644 --- a/cla_backend/apps/cla_eventlog/management/commands/remove_same_day_consecutive_outcome_codes.py +++ b/cla_backend/apps/cla_eventlog/management/commands/remove_same_day_consecutive_outcome_codes.py @@ -3,7 +3,6 @@ from datetime import datetime, time from itertools import groupby -import boto from django.conf import settings from django.core import serializers from django.core.management.base import BaseCommand @@ -12,7 +11,7 @@ from cla_eventlog.constants import LOG_LEVELS, LOG_TYPES from cla_eventlog.models import Log -from cla_backend.libs.aws.s3 import ReportsS3 +from cla_backend.libs.aws.s3 import ReportsS3, ClientError logger = logging.getLogger(__name__) @@ -75,7 +74,7 @@ def remove_same_day_consecutive_outcome_codes(self): dupes_to_remove = Log.objects.filter(id__in=same_day_consecutive_outcome_log_ids) try: self.write_queryset_to_s3(dupes_to_remove) - except boto.exception.S3ResponseError as e: + except ClientError as e: logger.error( "LGA-125: Could not get bucket {}: {}".format(settings.AWS_DELETED_OBJECTS_BUCKET_NAME, e) ) diff --git a/cla_backend/libs/aws/s3.py b/cla_backend/libs/aws/s3.py index 49b9a856f..92d01d7ef 100644 --- a/cla_backend/libs/aws/s3.py +++ b/cla_backend/libs/aws/s3.py @@ -2,6 +2,10 @@ from botocore.exceptions import ClientError +class ClientError(ClientError): + pass + + class StaticS3Storage(S3Boto3Storage): default_acl = "public-read" diff --git a/requirements/generated/requirements-dev.txt b/requirements/generated/requirements-dev.txt index db6d16c52..8e9fc31e5 100644 --- a/requirements/generated/requirements-dev.txt +++ b/requirements/generated/requirements-dev.txt @@ -16,8 +16,6 @@ bleach==3.3.0 # via -r requirements/source/requirements-base.in boto3==1.17.112 # via -r requirements/source/requirements-base.in -boto==2.39.0 - # via -r requirements/source/requirements-base.in botocore==1.20.112 # via # boto3 diff --git a/requirements/generated/requirements-docs.txt b/requirements/generated/requirements-docs.txt index 445b0acb1..96cc0742c 100644 --- a/requirements/generated/requirements-docs.txt +++ b/requirements/generated/requirements-docs.txt @@ -18,8 +18,6 @@ bleach==3.3.0 # via -r requirements/source/requirements-base.in boto3==1.17.112 # via -r requirements/source/requirements-base.in -boto==2.39.0 - # via -r requirements/source/requirements-base.in botocore==1.20.112 # via # boto3 diff --git a/requirements/generated/requirements-production.txt b/requirements/generated/requirements-production.txt index 69230a7de..9c1f7cbe3 100644 --- a/requirements/generated/requirements-production.txt +++ b/requirements/generated/requirements-production.txt @@ -14,8 +14,6 @@ bleach==3.3.0 # via -r requirements/source/requirements-base.in boto3==1.17.112 # via -r requirements/source/requirements-base.in -boto==2.39.0 - # via -r requirements/source/requirements-base.in botocore==1.20.112 # via # boto3 diff --git a/requirements/generated/requirements-testing.txt b/requirements/generated/requirements-testing.txt index 58b3bedfb..656a922aa 100644 --- a/requirements/generated/requirements-testing.txt +++ b/requirements/generated/requirements-testing.txt @@ -14,8 +14,6 @@ bleach==3.3.0 # via -r requirements/source/requirements-base.in boto3==1.17.112 # via -r requirements/source/requirements-base.in -boto==2.39.0 - # via -r requirements/source/requirements-base.in botocore==1.20.112 # via # boto3 diff --git a/requirements/source/requirements-base.in b/requirements/source/requirements-base.in index 2e8c4850c..60f7a98e7 100644 --- a/requirements/source/requirements-base.in +++ b/requirements/source/requirements-base.in @@ -41,7 +41,6 @@ polib==1.0.6 # Fork PgFulltext - PR added https://github.com/ministryofjustice/djorm-ext-pgfulltext/archive/refs/tags/0.1.0.tar.gz celery==3.1.18 -boto==2.39.0 PyYAML==5.4 pyminizip==0.2.3 From 637fab7676cfc3486749f65e06b256c96ed105ef Mon Sep 17 00:00:00 2001 From: said-moj <45761276+said-moj@users.noreply.github.com> Date: Fri, 4 Aug 2023 16:54:50 +0100 Subject: [PATCH 07/22] Update celery to 4.0 and remove reference to old boto package from sqs class --- cla_backend/settings/base.py | 25 +++----------- cla_backend/sqs.py | 34 ++----------------- requirements/generated/requirements-dev.txt | 13 +++---- requirements/generated/requirements-docs.txt | 16 +++++---- .../generated/requirements-production.txt | 16 +++++---- .../generated/requirements-testing.txt | 16 +++++---- requirements/source/requirements-base.in | 3 +- 7 files changed, 43 insertions(+), 80 deletions(-) diff --git a/cla_backend/settings/base.py b/cla_backend/settings/base.py index 00fb3a2b4..995b8da00 100644 --- a/cla_backend/settings/base.py +++ b/cla_backend/settings/base.py @@ -3,13 +3,11 @@ import os import sentry_sdk -from boto.s3.connection import NoHostProvided from cla_common.call_centre_availability import OpeningHours from cla_common.services import CacheAdapter from collections import defaultdict from kombu import transport from sentry_sdk.integrations.django import DjangoIntegration -from django.conf.global_settings import TEMPLATE_CONTEXT_PROCESSORS from cla_backend.sqs import CLASQSChannel @@ -116,10 +114,6 @@ def env_var_truthy_intention(name): AWS_DEFAULT_ACL = None AWS_QUERYSTRING_AUTH = False -# Annoyingly the host parameter boto.s3.connection.S3Connection needs to be host string if it's not the default -# value of boto.s3.connection.NoHostProvided class reference and not None -AWS_S3_HOST = os.environ.get("AWS_S3_HOST", NoHostProvided) - # This bucket needs to a private bucket as it will contain sensitive reports AWS_REPORTS_STORAGE_BUCKET_NAME = os.environ.get("AWS_REPORTS_STORAGE_BUCKET_NAME") # This bucket needs to a public bucket as it will serve public assets such as css,images and js @@ -400,24 +394,11 @@ def traces_sampler(sampling_context): OBIEE_EMAIL_TO = os.environ.get("OBIEE_EMAIL_TO", DEFAULT_EMAIL_TO) OBIEE_ZIP_PASSWORD = os.environ.get("OBIEE_ZIP_PASSWORD") -# celery -if all([env_var_truthy_intention("SQS_ACCESS_KEY"), env_var_truthy_intention("SQS_SECRET_KEY")]): - import urllib - - BROKER_URL = "sqs://{access_key}:{secret_key}@".format( - access_key=urllib.quote(os.environ.get("SQS_ACCESS_KEY"), safe=""), - secret_key=urllib.quote(os.environ.get("SQS_SECRET_KEY"), safe=""), - ) -else: - # if no BROKER_URL specified then don't try to use celery - # because it'll just cause errors - CELERY_ALWAYS_EAGER = True - CLA_ENV = os.environ.get("CLA_ENV", "local") BROKER_TRANSPORT_OPTIONS = { "polling_interval": 10, - "region": os.environ.get("SQS_REGION", "eu-west-1"), + "region": os.environ.get("SQS_REGION", "eu-west-2"), "wait_time_seconds": 20, } @@ -426,11 +407,15 @@ def traces_sampler(sampling_context): # This is to stop actions such as ListQueues being triggered # which we do not have on the cloud platform environments transport.SQS.Transport.Channel = CLASQSChannel + BROKER_URL = "sqs://" predefined_queue_url = os.environ.get("CELERY_PREDEFINED_QUEUE_URL") CELERY_DEFAULT_QUEUE = predefined_queue_url.split("/")[-1] BROKER_TRANSPORT_OPTIONS["predefined_queue_url"] = predefined_queue_url else: + # if no BROKER_URL specified then don't try to use celery + # because it'll just cause errors + CELERY_ALWAYS_EAGER = True BROKER_TRANSPORT_OPTIONS["queue_name_prefix"] = "env-%(env)s-" % {"env": CLA_ENV} CELERY_ACCEPT_CONTENT = ["yaml"] # because json serializer doesn't support dates diff --git a/cla_backend/sqs.py b/cla_backend/sqs.py index 0970f81e4..6bf67ce73 100644 --- a/cla_backend/sqs.py +++ b/cla_backend/sqs.py @@ -1,39 +1,11 @@ -import collections - -from boto.sqs.queue import Queue -from boto.sqs.message import Message - from kombu.transport.SQS import Channel -from kombu.transport import virtual -from kombu.utils import cached_property class CLASQSChannel(Channel): - @cached_property - def predefined_queues(self): - # We are using a strict sqs setup which we are given only list of predefined queues and + def _update_queue_cache(self, queue_name_prefix): url = self.transport_options.get("predefined_queue_url", None) - q = Queue(connection=self.sqs, url=url, message_class=Message) - return [q] - - def __init__(self, *args, **kwargs): - # CLA Change - On cloud platforms we don't have permissions to perform actions such as ListQueues - # So instead lets use a list of predefined queue names - # Remove call to direct parent as that will perform the ListQueues action - # super(CLASQSChannel, self).__init__(*args, **kwargs) - virtual.Channel.__init__(self, *args, **kwargs) - - queues = self.predefined_queues # self.sqs.get_all_queues(prefix=self.queue_name_prefix) - - for queue in queues: - self._queue_cache[queue.name] = queue - self._fanout_queues = set() - - # The drain_events() method stores extra messages in a local - # Deque object. This allows multiple messages to be requested from - # SQS at once for performance, but maintains the same external API - # to the caller of the drain_events() method. - self._queue_message_cache = collections.deque() + queue_name = url.split("/")[-1] + self._queue_cache[queue_name] = url def _new_queue(self, queue, **kwargs): # Translate to SQS name for consistency with initial diff --git a/requirements/generated/requirements-dev.txt b/requirements/generated/requirements-dev.txt index 8e9fc31e5..dd559de3e 100644 --- a/requirements/generated/requirements-dev.txt +++ b/requirements/generated/requirements-dev.txt @@ -4,13 +4,11 @@ # # pip-compile --output-file=requirements/generated/requirements-dev.txt requirements/source/requirements-dev.in # -amqp==1.4.9 - # via kombu -anyjson==0.3.3 +amqp==2.6.1 # via kombu aspy.yaml==1.3.0 # via pre-commit -billiard==3.3.0.23 +billiard==3.5.0.5 # via celery bleach==3.3.0 # via -r requirements/source/requirements-base.in @@ -20,7 +18,7 @@ botocore==1.20.112 # via # boto3 # s3transfer -celery==3.1.18 +celery==4.0.2 # via -r requirements/source/requirements-base.in certifi==2021.10.8 # via @@ -154,6 +152,7 @@ idna==2.10 # urllib3 importlib-metadata==2.1.3 # via + # kombu # pre-commit # sqlalchemy # virtualenv @@ -177,7 +176,7 @@ jsonpatch==1.9 # via -r requirements/source/requirements-base.in jsonpointer==2.3 # via jsonpatch -kombu==3.0.37 +kombu==4.6.11 # via celery logstash-formatter==0.5.9 # via -r requirements/source/requirements-base.in @@ -308,6 +307,8 @@ urllib3[secure]==1.26.14 # sentry-sdk uwsgi==2.0.18 # via -r requirements/source/requirements-base.in +vine==1.3.0 + # via amqp virtualenv==20.15.1 # via pre-commit webdriver-manager==1.6.2 diff --git a/requirements/generated/requirements-docs.txt b/requirements/generated/requirements-docs.txt index 96cc0742c..73a1ad5ab 100644 --- a/requirements/generated/requirements-docs.txt +++ b/requirements/generated/requirements-docs.txt @@ -6,13 +6,11 @@ # alabaster==0.7.12 # via sphinx -amqp==1.4.9 - # via kombu -anyjson==0.3.3 +amqp==2.6.1 # via kombu babel==2.9.1 # via sphinx -billiard==3.3.0.23 +billiard==3.5.0.5 # via celery bleach==3.3.0 # via -r requirements/source/requirements-base.in @@ -22,7 +20,7 @@ botocore==1.20.112 # via # boto3 # s3transfer -celery==3.1.18 +celery==4.0.2 # via -r requirements/source/requirements-base.in certifi==2021.10.8 # via @@ -116,7 +114,9 @@ idna==2.10 imagesize==1.4.1 # via sphinx importlib-metadata==2.1.3 - # via sqlalchemy + # via + # kombu + # sqlalchemy jdcal==1.4.1 # via openpyxl jinja2==2.11.3 @@ -131,7 +131,7 @@ jsonpatch==1.9 # via -r requirements/source/requirements-base.in jsonpointer==2.3 # via jsonpatch -kombu==3.0.37 +kombu==4.6.11 # via celery logstash-formatter==0.5.9 # via -r requirements/source/requirements-base.in @@ -230,6 +230,8 @@ urllib3==1.26.14 # sentry-sdk uwsgi==2.0.18 # via -r requirements/source/requirements-base.in +vine==1.3.0 + # via amqp webencodings==0.5.1 # via bleach xlrd==2.0.1 diff --git a/requirements/generated/requirements-production.txt b/requirements/generated/requirements-production.txt index 9c1f7cbe3..e230094af 100644 --- a/requirements/generated/requirements-production.txt +++ b/requirements/generated/requirements-production.txt @@ -4,11 +4,9 @@ # # pip-compile --output-file=requirements/generated/requirements-production.txt requirements/source/requirements-production.in # -amqp==1.4.9 +amqp==2.6.1 # via kombu -anyjson==0.3.3 - # via kombu -billiard==3.3.0.23 +billiard==3.5.0.5 # via celery bleach==3.3.0 # via -r requirements/source/requirements-base.in @@ -18,7 +16,7 @@ botocore==1.20.112 # via # boto3 # s3transfer -celery==3.1.18 +celery==4.0.2 # via -r requirements/source/requirements-base.in certifi==2021.10.8 # via @@ -104,7 +102,9 @@ futures==3.4.0 idna==2.10 # via requests importlib-metadata==2.1.3 - # via sqlalchemy + # via + # kombu + # sqlalchemy jdcal==1.4.1 # via openpyxl jmespath==0.10.0 @@ -117,7 +117,7 @@ jsonpatch==1.9 # via -r requirements/source/requirements-base.in jsonpointer==2.3 # via jsonpatch -kombu==3.0.37 +kombu==4.6.11 # via celery logstash-formatter==0.5.9 # via -r requirements/source/requirements-base.in @@ -197,6 +197,8 @@ urllib3==1.26.14 # sentry-sdk uwsgi==2.0.18 # via -r requirements/source/requirements-base.in +vine==1.3.0 + # via amqp webencodings==0.5.1 # via bleach xlrd==2.0.1 diff --git a/requirements/generated/requirements-testing.txt b/requirements/generated/requirements-testing.txt index 656a922aa..59c51e168 100644 --- a/requirements/generated/requirements-testing.txt +++ b/requirements/generated/requirements-testing.txt @@ -4,11 +4,9 @@ # # pip-compile --output-file=requirements/generated/requirements-testing.txt requirements/source/requirements-testing.in # -amqp==1.4.9 +amqp==2.6.1 # via kombu -anyjson==0.3.3 - # via kombu -billiard==3.3.0.23 +billiard==3.5.0.5 # via celery bleach==3.3.0 # via -r requirements/source/requirements-base.in @@ -18,7 +16,7 @@ botocore==1.20.112 # via # boto3 # s3transfer -celery==3.1.18 +celery==4.0.2 # via -r requirements/source/requirements-base.in certifi==2021.10.8 # via @@ -133,7 +131,9 @@ idna==2.10 # requests # urllib3 importlib-metadata==2.1.3 - # via sqlalchemy + # via + # kombu + # sqlalchemy ipaddress==1.0.23 # via # cryptography @@ -150,7 +150,7 @@ jsonpatch==1.9 # via -r requirements/source/requirements-base.in jsonpointer==2.3 # via jsonpatch -kombu==3.0.37 +kombu==4.6.11 # via celery logstash-formatter==0.5.9 # via -r requirements/source/requirements-base.in @@ -254,6 +254,8 @@ urllib3[secure]==1.26.14 # sentry-sdk uwsgi==2.0.18 # via -r requirements/source/requirements-base.in +vine==1.3.0 + # via amqp webdriver-manager==1.6.2 # via -r requirements/source/requirements-testing.in webencodings==0.5.1 diff --git a/requirements/source/requirements-base.in b/requirements/source/requirements-base.in index 60f7a98e7..4611b5ebc 100644 --- a/requirements/source/requirements-base.in +++ b/requirements/source/requirements-base.in @@ -40,7 +40,7 @@ polib==1.0.6 # Fork PgFulltext - PR added https://github.com/ministryofjustice/djorm-ext-pgfulltext/archive/refs/tags/0.1.0.tar.gz -celery==3.1.18 +celery==4.0.2 PyYAML==5.4 pyminizip==0.2.3 @@ -59,4 +59,3 @@ django-session-security==2.2.3 # Govuk Notify Service notifications-python-client==6.0.0 - From fdfdc319a47009075dff41edc4ef087c82554038 Mon Sep 17 00:00:00 2001 From: said-moj <45761276+said-moj@users.noreply.github.com> Date: Mon, 7 Aug 2023 09:42:45 +0100 Subject: [PATCH 08/22] Downgrading PyYAML to 5.3.1 due to 5.4 failing to build for Cython<3 Solution taken from https://github.com/yaml/pyyaml/issues/724 Main issue https://github.com/yaml/pyyaml/issues/728 Recommended workaround https://github.com/yaml/pyyaml/issues/736 --- requirements/generated/requirements-dev.txt | 2 +- requirements/generated/requirements-docs.txt | 2 +- requirements/generated/requirements-production.txt | 2 +- requirements/generated/requirements-testing.txt | 2 +- requirements/source/requirements-base.in | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/requirements/generated/requirements-dev.txt b/requirements/generated/requirements-dev.txt index dd559de3e..bb8d168d6 100644 --- a/requirements/generated/requirements-dev.txt +++ b/requirements/generated/requirements-dev.txt @@ -239,7 +239,7 @@ pytz==2021.1 # -r requirements/source/requirements-base.in # celery # cla-common -pyyaml==5.4 +pyyaml==5.3.1 # via # -r requirements/source/requirements-base.in # aspy.yaml diff --git a/requirements/generated/requirements-docs.txt b/requirements/generated/requirements-docs.txt index 73a1ad5ab..dca6b63ef 100644 --- a/requirements/generated/requirements-docs.txt +++ b/requirements/generated/requirements-docs.txt @@ -181,7 +181,7 @@ pytz==2021.1 # babel # celery # cla-common -pyyaml==5.4 +pyyaml==5.3.1 # via -r requirements/source/requirements-base.in requests==2.27.1 # via diff --git a/requirements/generated/requirements-production.txt b/requirements/generated/requirements-production.txt index e230094af..baddc9078 100644 --- a/requirements/generated/requirements-production.txt +++ b/requirements/generated/requirements-production.txt @@ -160,7 +160,7 @@ pytz==2021.1 # -r requirements/source/requirements-base.in # celery # cla-common -pyyaml==5.4 +pyyaml==5.3.1 # via -r requirements/source/requirements-base.in requests==2.27.1 # via diff --git a/requirements/generated/requirements-testing.txt b/requirements/generated/requirements-testing.txt index 59c51e168..0a49021eb 100644 --- a/requirements/generated/requirements-testing.txt +++ b/requirements/generated/requirements-testing.txt @@ -202,7 +202,7 @@ pytz==2021.1 # -r requirements/source/requirements-base.in # celery # cla-common -pyyaml==5.4 +pyyaml==5.3.1 # via -r requirements/source/requirements-base.in requests==2.27.1 # via diff --git a/requirements/source/requirements-base.in b/requirements/source/requirements-base.in index 4611b5ebc..f1534ded6 100644 --- a/requirements/source/requirements-base.in +++ b/requirements/source/requirements-base.in @@ -41,7 +41,7 @@ polib==1.0.6 # Fork PgFulltext - PR added https://github.com/ministryofjustice/djorm-ext-pgfulltext/archive/refs/tags/0.1.0.tar.gz celery==4.0.2 -PyYAML==5.4 +PyYAML==5.3.1 pyminizip==0.2.3 #Irat healthcheck and ping package From 60301d979fd9649b080fed5f9b1ac7aa33fc1aa2 Mon Sep 17 00:00:00 2001 From: said-moj <45761276+said-moj@users.noreply.github.com> Date: Mon, 7 Aug 2023 12:14:14 +0100 Subject: [PATCH 09/22] Celery no longer auto registers class based tasks. Manually register class based celery tasks --- cla_backend/apps/reports/tasks.py | 4 ++++ cla_backend/libs/aws/s3.py | 10 ++++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/cla_backend/apps/reports/tasks.py b/cla_backend/apps/reports/tasks.py index d7e1196f5..00322dcf3 100644 --- a/cla_backend/apps/reports/tasks.py +++ b/cla_backend/apps/reports/tasks.py @@ -25,6 +25,7 @@ from core.utils import remember_cwd from checker.models import ReasonForContacting from urlparse import urlparse +from cla_backend.celery import app logger = logging.getLogger(__name__) @@ -150,6 +151,9 @@ def run(self, user_id, filename, form_class_name, post_data, *args, **kwargs): pass +app.tasks.register(OBIEEExportTask()) + + class ReasonForContactingExportTask(ExportTaskBase): def run(self, user_id, filename, form_class_name, post_data, *args, **kwargs): """ diff --git a/cla_backend/libs/aws/s3.py b/cla_backend/libs/aws/s3.py index 92d01d7ef..1f3a4c54e 100644 --- a/cla_backend/libs/aws/s3.py +++ b/cla_backend/libs/aws/s3.py @@ -1,3 +1,4 @@ +from tempfile import NamedTemporaryFile from storages.backends.s3boto3 import S3Boto3Storage from botocore.exceptions import ClientError @@ -13,17 +14,18 @@ class StaticS3Storage(S3Boto3Storage): class ReportsS3: @classmethod def get_s3_connection(cls, bucket_name): - return StaticS3Storage(bucket=bucket_name) + return S3Boto3Storage(bucket=bucket_name) @classmethod def download_file(cls, bucket_name, key): try: - response = cls.get_s3_connection(bucket_name).bucket.Object(key).get() + obj = cls.get_s3_connection(bucket_name).bucket.Object(key.strip("/")) + data = NamedTemporaryFile() + obj.download_fileobj(data) + return {"headers": {"Content-Type": obj.content_type}, "body": data} except ClientError: return None - return {"headers": {"Content-Type": response["ContentType"]}, "body": response["Body"]} - @classmethod def save_file(cls, bucket_name, key, path): cls.get_s3_connection(bucket_name).save(key, open(path, "rb")) From 468273bc296cac9223277ab8f4691cf23d4d37e4 Mon Sep 17 00:00:00 2001 From: said-moj <45761276+said-moj@users.noreply.github.com> Date: Mon, 7 Aug 2023 15:56:14 +0100 Subject: [PATCH 10/22] Use predefined_queues transport option defined in new version of kombu --- cla_backend/settings/base.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/cla_backend/settings/base.py b/cla_backend/settings/base.py index 995b8da00..c761ef937 100644 --- a/cla_backend/settings/base.py +++ b/cla_backend/settings/base.py @@ -6,9 +6,7 @@ from cla_common.call_centre_availability import OpeningHours from cla_common.services import CacheAdapter from collections import defaultdict -from kombu import transport from sentry_sdk.integrations.django import DjangoIntegration -from cla_backend.sqs import CLASQSChannel def env_var_truthy_intention(name): @@ -403,15 +401,9 @@ def traces_sampler(sampling_context): } if os.environ.get("CELERY_PREDEFINED_QUEUE_URL"): - # Monkey patch the SQS transport channel to use our channel - # This is to stop actions such as ListQueues being triggered - # which we do not have on the cloud platform environments - transport.SQS.Transport.Channel = CLASQSChannel - BROKER_URL = "sqs://" - predefined_queue_url = os.environ.get("CELERY_PREDEFINED_QUEUE_URL") CELERY_DEFAULT_QUEUE = predefined_queue_url.split("/")[-1] - BROKER_TRANSPORT_OPTIONS["predefined_queue_url"] = predefined_queue_url + BROKER_TRANSPORT_OPTIONS["predefined_queues"] = {CELERY_DEFAULT_QUEUE: {"url": predefined_queue_url}} else: # if no BROKER_URL specified then don't try to use celery # because it'll just cause errors From 0b0d640f6fcdf9833a6ad3dfd438c38583a62e71 Mon Sep 17 00:00:00 2001 From: said-moj <45761276+said-moj@users.noreply.github.com> Date: Mon, 7 Aug 2023 16:35:46 +0100 Subject: [PATCH 11/22] Set obiee task name --- cla_backend/apps/reports/tasks.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cla_backend/apps/reports/tasks.py b/cla_backend/apps/reports/tasks.py index 00322dcf3..25b17fe1e 100644 --- a/cla_backend/apps/reports/tasks.py +++ b/cla_backend/apps/reports/tasks.py @@ -112,6 +112,8 @@ def run(self, user_id, filename, form_class_name, post_data, *args, **kwargs): class OBIEEExportTask(ExportTaskBase): + name = "obieeexporttask" + def run(self, user_id, filename, form_class_name, post_data, *args, **kwargs): """ Export a full dump of the db for OBIEE export and make it available From 93887bd9faf6c96c8f08fb1595143d62195e8bbf Mon Sep 17 00:00:00 2001 From: said-moj <45761276+said-moj@users.noreply.github.com> Date: Mon, 7 Aug 2023 16:50:51 +0100 Subject: [PATCH 12/22] Starting from version 4.0, Celery uses message protocol version 2 as default value. Explicitly using old protocol --- cla_backend/settings/base.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cla_backend/settings/base.py b/cla_backend/settings/base.py index c761ef937..26e05abb7 100644 --- a/cla_backend/settings/base.py +++ b/cla_backend/settings/base.py @@ -401,6 +401,7 @@ def traces_sampler(sampling_context): } if os.environ.get("CELERY_PREDEFINED_QUEUE_URL"): + BROKER_URL = "sqs://" predefined_queue_url = os.environ.get("CELERY_PREDEFINED_QUEUE_URL") CELERY_DEFAULT_QUEUE = predefined_queue_url.split("/")[-1] BROKER_TRANSPORT_OPTIONS["predefined_queues"] = {CELERY_DEFAULT_QUEUE: {"url": predefined_queue_url}} @@ -422,6 +423,7 @@ def traces_sampler(sampling_context): CELERY_TIMEZONE = "UTC" # apps with celery tasks CELERY_IMPORTS = ["reports.tasks", "notifications.tasks"] +CELERY_TASK_PROTOCOL = 1 CONTRACT_2018_ENABLED = os.environ.get("CONTRACT_2018_ENABLED", "True") == "True" PING_JSON_KEYS["CONTRACT_2018_ENABLED_key"] = "CONTRACT_2018_ENABLED" From 7590550d92e3158f9a353bc5f371e15ca014d777 Mon Sep 17 00:00:00 2001 From: said-moj <45761276+said-moj@users.noreply.github.com> Date: Tue, 8 Aug 2023 09:31:53 +0100 Subject: [PATCH 13/22] Remove unused sqs keys as we are using short lived tokens --- helm_deploy/cla-backend/values.yaml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/helm_deploy/cla-backend/values.yaml b/helm_deploy/cla-backend/values.yaml index 04b979a5d..a9704d013 100644 --- a/helm_deploy/cla-backend/values.yaml +++ b/helm_deploy/cla-backend/values.yaml @@ -156,14 +156,6 @@ envVars: value: "True" SQS_REGION: value: "eu-west-2" - SQS_ACCESS_KEY: - secret: - name: sqs - key: access_key_id - SQS_SECRET_KEY: - secret: - name: sqs - key: secret_access_key OBIEE_EMAIL_TO: secret: name: obiee From b672347082fc0be36f9fcd1f79552467426a08ea Mon Sep 17 00:00:00 2001 From: said-moj <45761276+said-moj@users.noreply.github.com> Date: Tue, 8 Aug 2023 09:52:19 +0100 Subject: [PATCH 14/22] Adding service account to worker deployment --- helm_deploy/cla-backend/templates/deployment-worker.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/helm_deploy/cla-backend/templates/deployment-worker.yaml b/helm_deploy/cla-backend/templates/deployment-worker.yaml index 127edb0e9..9dcf00929 100644 --- a/helm_deploy/cla-backend/templates/deployment-worker.yaml +++ b/helm_deploy/cla-backend/templates/deployment-worker.yaml @@ -13,6 +13,7 @@ spec: labels: app: {{ include "cla-backend.fullname" . }}-worker spec: + serviceAccountName: {{ include "cla-backend.serviceAccountName" . }} containers: - name: {{ include "cla-backend.fullname" . }}-worker image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" From a74b713c4dc56d1e0833e7928f74ae0ea31bd030 Mon Sep 17 00:00:00 2001 From: said-moj <45761276+said-moj@users.noreply.github.com> Date: Tue, 8 Aug 2023 11:16:21 +0100 Subject: [PATCH 15/22] Kombu versions above 4.1 require celery 4.2 otherwise you will get a AttributeError: async when trying to run worker pods See https://github.com/celery/kombu/issues/873 for more info --- requirements/generated/requirements-dev.txt | 7 ++++--- requirements/generated/requirements-docs.txt | 10 +++++----- requirements/generated/requirements-production.txt | 10 +++++----- requirements/generated/requirements-testing.txt | 10 +++++----- requirements/source/requirements-base.in | 1 + 5 files changed, 20 insertions(+), 18 deletions(-) diff --git a/requirements/generated/requirements-dev.txt b/requirements/generated/requirements-dev.txt index bb8d168d6..dc5cf7cf3 100644 --- a/requirements/generated/requirements-dev.txt +++ b/requirements/generated/requirements-dev.txt @@ -152,7 +152,6 @@ idna==2.10 # urllib3 importlib-metadata==2.1.3 # via - # kombu # pre-commit # sqlalchemy # virtualenv @@ -176,8 +175,10 @@ jsonpatch==1.9 # via -r requirements/source/requirements-base.in jsonpointer==2.3 # via jsonpatch -kombu==4.6.11 - # via celery +kombu==4.1.0 + # via + # -r requirements/source/requirements-base.in + # celery logstash-formatter==0.5.9 # via -r requirements/source/requirements-base.in lxml==4.9.1 diff --git a/requirements/generated/requirements-docs.txt b/requirements/generated/requirements-docs.txt index dca6b63ef..6fc305a78 100644 --- a/requirements/generated/requirements-docs.txt +++ b/requirements/generated/requirements-docs.txt @@ -114,9 +114,7 @@ idna==2.10 imagesize==1.4.1 # via sphinx importlib-metadata==2.1.3 - # via - # kombu - # sqlalchemy + # via sqlalchemy jdcal==1.4.1 # via openpyxl jinja2==2.11.3 @@ -131,8 +129,10 @@ jsonpatch==1.9 # via -r requirements/source/requirements-base.in jsonpointer==2.3 # via jsonpatch -kombu==4.6.11 - # via celery +kombu==4.1.0 + # via + # -r requirements/source/requirements-base.in + # celery logstash-formatter==0.5.9 # via -r requirements/source/requirements-base.in lxml==4.9.1 diff --git a/requirements/generated/requirements-production.txt b/requirements/generated/requirements-production.txt index baddc9078..ca54033d4 100644 --- a/requirements/generated/requirements-production.txt +++ b/requirements/generated/requirements-production.txt @@ -102,9 +102,7 @@ futures==3.4.0 idna==2.10 # via requests importlib-metadata==2.1.3 - # via - # kombu - # sqlalchemy + # via sqlalchemy jdcal==1.4.1 # via openpyxl jmespath==0.10.0 @@ -117,8 +115,10 @@ jsonpatch==1.9 # via -r requirements/source/requirements-base.in jsonpointer==2.3 # via jsonpatch -kombu==4.6.11 - # via celery +kombu==4.1.0 + # via + # -r requirements/source/requirements-base.in + # celery logstash-formatter==0.5.9 # via -r requirements/source/requirements-base.in lxml==4.9.1 diff --git a/requirements/generated/requirements-testing.txt b/requirements/generated/requirements-testing.txt index 0a49021eb..b50d70239 100644 --- a/requirements/generated/requirements-testing.txt +++ b/requirements/generated/requirements-testing.txt @@ -131,9 +131,7 @@ idna==2.10 # requests # urllib3 importlib-metadata==2.1.3 - # via - # kombu - # sqlalchemy + # via sqlalchemy ipaddress==1.0.23 # via # cryptography @@ -150,8 +148,10 @@ jsonpatch==1.9 # via -r requirements/source/requirements-base.in jsonpointer==2.3 # via jsonpatch -kombu==4.6.11 - # via celery +kombu==4.1.0 + # via + # -r requirements/source/requirements-base.in + # celery logstash-formatter==0.5.9 # via -r requirements/source/requirements-base.in lxml==4.9.1 diff --git a/requirements/source/requirements-base.in b/requirements/source/requirements-base.in index f1534ded6..f746eab57 100644 --- a/requirements/source/requirements-base.in +++ b/requirements/source/requirements-base.in @@ -41,6 +41,7 @@ polib==1.0.6 # Fork PgFulltext - PR added https://github.com/ministryofjustice/djorm-ext-pgfulltext/archive/refs/tags/0.1.0.tar.gz celery==4.0.2 +kombu==4.1.0 PyYAML==5.3.1 pyminizip==0.2.3 From 60b6638d243e8687800377cf03cc6e7021d9d46b Mon Sep 17 00:00:00 2001 From: said-moj <45761276+said-moj@users.noreply.github.com> Date: Tue, 8 Aug 2023 13:29:39 +0100 Subject: [PATCH 16/22] Install celery[sqs] so that dependencies like pycurl get installed --- Dockerfile | 2 +- cla_backend/settings/base.py | 8 +++++++- requirements/generated/requirements-dev.txt | 6 +++++- requirements/generated/requirements-docs.txt | 6 +++++- requirements/generated/requirements-production.txt | 6 +++++- requirements/generated/requirements-testing.txt | 6 +++++- requirements/source/requirements-base.in | 2 +- 7 files changed, 29 insertions(+), 7 deletions(-) diff --git a/Dockerfile b/Dockerfile index 77f889170..89f4b397a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,7 +15,7 @@ RUN adduser -D app && \ # To install pip dependencies RUN apk add --no-cache \ build-base \ - curl \ + curl curl-dev \ git \ libxml2-dev \ libxslt-dev \ diff --git a/cla_backend/settings/base.py b/cla_backend/settings/base.py index 26e05abb7..b3a37c8a0 100644 --- a/cla_backend/settings/base.py +++ b/cla_backend/settings/base.py @@ -7,6 +7,8 @@ from cla_common.services import CacheAdapter from collections import defaultdict from sentry_sdk.integrations.django import DjangoIntegration +from kombu import transport +from cla_backend.sqs import CLASQSChannel def env_var_truthy_intention(name): @@ -401,10 +403,14 @@ def traces_sampler(sampling_context): } if os.environ.get("CELERY_PREDEFINED_QUEUE_URL"): + # Monkey patch the SQS transport channel to use our channel + # This is to stop actions such as ListQueues being triggered + # which we do not have on the cloud platform environments + transport.SQS.Transport.Channel = CLASQSChannel BROKER_URL = "sqs://" predefined_queue_url = os.environ.get("CELERY_PREDEFINED_QUEUE_URL") CELERY_DEFAULT_QUEUE = predefined_queue_url.split("/")[-1] - BROKER_TRANSPORT_OPTIONS["predefined_queues"] = {CELERY_DEFAULT_QUEUE: {"url": predefined_queue_url}} + BROKER_TRANSPORT_OPTIONS["predefined_queue_url"] = predefined_queue_url else: # if no BROKER_URL specified then don't try to use celery # because it'll just cause errors diff --git a/requirements/generated/requirements-dev.txt b/requirements/generated/requirements-dev.txt index dc5cf7cf3..f882716b2 100644 --- a/requirements/generated/requirements-dev.txt +++ b/requirements/generated/requirements-dev.txt @@ -14,11 +14,13 @@ bleach==3.3.0 # via -r requirements/source/requirements-base.in boto3==1.17.112 # via -r requirements/source/requirements-base.in +boto==2.49.0 + # via celery botocore==1.20.112 # via # boto3 # s3transfer -celery==4.0.2 +celery[sqs]==4.0.2 # via -r requirements/source/requirements-base.in certifi==2021.10.8 # via @@ -220,6 +222,8 @@ psycopg2==2.7.5 # via -r requirements/source/requirements-base.in pycparser==2.21 # via cffi +pycurl==7.43.0.5 + # via celery pyjwt==1.7.1 # via notifications-python-client pyminizip==0.2.3 diff --git a/requirements/generated/requirements-docs.txt b/requirements/generated/requirements-docs.txt index 6fc305a78..cd634ab5c 100644 --- a/requirements/generated/requirements-docs.txt +++ b/requirements/generated/requirements-docs.txt @@ -16,11 +16,13 @@ bleach==3.3.0 # via -r requirements/source/requirements-base.in boto3==1.17.112 # via -r requirements/source/requirements-base.in +boto==2.49.0 + # via celery botocore==1.20.112 # via # boto3 # s3transfer -celery==4.0.2 +celery[sqs]==4.0.2 # via -r requirements/source/requirements-base.in certifi==2021.10.8 # via @@ -161,6 +163,8 @@ polib==1.0.6 # via -r requirements/source/requirements-base.in psycopg2==2.7.5 # via -r requirements/source/requirements-base.in +pycurl==7.43.0.5 + # via celery pygments==2.5.2 # via sphinx pyjwt==1.7.1 diff --git a/requirements/generated/requirements-production.txt b/requirements/generated/requirements-production.txt index ca54033d4..e79ed588e 100644 --- a/requirements/generated/requirements-production.txt +++ b/requirements/generated/requirements-production.txt @@ -12,11 +12,13 @@ bleach==3.3.0 # via -r requirements/source/requirements-base.in boto3==1.17.112 # via -r requirements/source/requirements-base.in +boto==2.49.0 + # via celery botocore==1.20.112 # via # boto3 # s3transfer -celery==4.0.2 +celery[sqs]==4.0.2 # via -r requirements/source/requirements-base.in certifi==2021.10.8 # via @@ -143,6 +145,8 @@ polib==1.0.6 # via -r requirements/source/requirements-base.in psycopg2==2.7.5 # via -r requirements/source/requirements-base.in +pycurl==7.43.0.5 + # via celery pyjwt==1.7.1 # via notifications-python-client pyminizip==0.2.3 diff --git a/requirements/generated/requirements-testing.txt b/requirements/generated/requirements-testing.txt index b50d70239..daddf5f4f 100644 --- a/requirements/generated/requirements-testing.txt +++ b/requirements/generated/requirements-testing.txt @@ -12,11 +12,13 @@ bleach==3.3.0 # via -r requirements/source/requirements-base.in boto3==1.17.112 # via -r requirements/source/requirements-base.in +boto==2.49.0 + # via celery botocore==1.20.112 # via # boto3 # s3transfer -celery==4.0.2 +celery[sqs]==4.0.2 # via -r requirements/source/requirements-base.in certifi==2021.10.8 # via @@ -182,6 +184,8 @@ psycopg2==2.7.5 # via -r requirements/source/requirements-base.in pycparser==2.21 # via cffi +pycurl==7.43.0.5 + # via celery pyjwt==1.7.1 # via notifications-python-client pyminizip==0.2.3 diff --git a/requirements/source/requirements-base.in b/requirements/source/requirements-base.in index f746eab57..620ec1c17 100644 --- a/requirements/source/requirements-base.in +++ b/requirements/source/requirements-base.in @@ -40,7 +40,7 @@ polib==1.0.6 # Fork PgFulltext - PR added https://github.com/ministryofjustice/djorm-ext-pgfulltext/archive/refs/tags/0.1.0.tar.gz -celery==4.0.2 +celery[sqs]==4.0.2 kombu==4.1.0 PyYAML==5.3.1 pyminizip==0.2.3 From f6261c789d62a550a8c7652c8fd3ef9a274872cc Mon Sep 17 00:00:00 2001 From: said-moj <45761276+said-moj@users.noreply.github.com> Date: Tue, 8 Aug 2023 13:46:42 +0100 Subject: [PATCH 17/22] Adding steps to install ibcurl4-gnutls-dev because it is required for pycurl to the pipeline --- .circleci/config.yml | 6 ++++++ cla_backend/libs/aws/s3.py | 5 +++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 46c3312f1..4ff7f4156 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -97,6 +97,9 @@ jobs: - run: name: Setup Python environment command: | + echo "Installing ibcurl4-gnutls-dev because it is required for pycurl" + sudo apt-get update + sudo apt-get install -y libcurl4-gnutls-dev pip install virtualenv virtualenv pip-compile-env - restore_cache: @@ -137,6 +140,9 @@ jobs: - run: name: Setup Python environment command: | + echo "Installing ibcurl4-gnutls-dev because it is required for pycurl" + sudo apt-get update + sudo apt-get install -y libcurl4-gnutls-dev sudo apt-get update && sudo apt-get install -y libpython2.7 firefox pip install virtualenv virtualenv env diff --git a/cla_backend/libs/aws/s3.py b/cla_backend/libs/aws/s3.py index 1f3a4c54e..095e0a281 100644 --- a/cla_backend/libs/aws/s3.py +++ b/cla_backend/libs/aws/s3.py @@ -19,16 +19,17 @@ def get_s3_connection(cls, bucket_name): @classmethod def download_file(cls, bucket_name, key): try: - obj = cls.get_s3_connection(bucket_name).bucket.Object(key.strip("/")) + obj = cls.get_s3_connection(bucket_name).bucket.Object(key) data = NamedTemporaryFile() obj.download_fileobj(data) + data.seek(0) return {"headers": {"Content-Type": obj.content_type}, "body": data} except ClientError: return None @classmethod def save_file(cls, bucket_name, key, path): - cls.get_s3_connection(bucket_name).save(key, open(path, "rb")) + cls.get_s3_connection(bucket_name).bucket.Object(key).upload_file(path) @classmethod def delete_file(cls, bucket_name, key): From 0bd09384d568706b0d8789239ec208733cab7250 Mon Sep 17 00:00:00 2001 From: said-moj <45761276+said-moj@users.noreply.github.com> Date: Wed, 9 Aug 2023 08:45:25 +0100 Subject: [PATCH 18/22] Manually register reason for contacting export task --- cla_backend/apps/reports/tasks.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/cla_backend/apps/reports/tasks.py b/cla_backend/apps/reports/tasks.py index 25b17fe1e..dc64f75be 100644 --- a/cla_backend/apps/reports/tasks.py +++ b/cla_backend/apps/reports/tasks.py @@ -153,10 +153,15 @@ def run(self, user_id, filename, form_class_name, post_data, *args, **kwargs): pass +# The Task base class no longer automatically register tasks +# https://docs.celeryq.dev/en/v4.0.0/whatsnew-4.0.html#the-task-base-class-no-longer-automatically-register-tasks +# https://github.com/celery/celery/issues/5992 app.tasks.register(OBIEEExportTask()) class ReasonForContactingExportTask(ExportTaskBase): + name = "reasonforcontactingexport" + def run(self, user_id, filename, form_class_name, post_data, *args, **kwargs): """ Export csv files for each of the referrers from reason for contacting @@ -228,3 +233,9 @@ def generate_rfc_zip(self): with ZipFile(self.filepath, "w") as refer_zip: for csv_file in glob.glob("*.csv"): refer_zip.write(csv_file) + + +# The Task base class no longer automatically register tasks +# https://docs.celeryq.dev/en/v4.0.0/whatsnew-4.0.html#the-task-base-class-no-longer-automatically-register-tasks +# https://github.com/celery/celery/issues/5992 +app.tasks.register(ReasonForContactingExportTask()) From 7e00724d50af8b85a77d56f1cb95364c19f682c0 Mon Sep 17 00:00:00 2001 From: said-moj <45761276+said-moj@users.noreply.github.com> Date: Wed, 9 Aug 2023 15:24:53 +0100 Subject: [PATCH 19/22] Set name of default export task --- cla_backend/apps/reports/tasks.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/cla_backend/apps/reports/tasks.py b/cla_backend/apps/reports/tasks.py index dc64f75be..6f3f7fb69 100644 --- a/cla_backend/apps/reports/tasks.py +++ b/cla_backend/apps/reports/tasks.py @@ -87,6 +87,8 @@ def send_to_s3(self): class ExportTask(ExportTaskBase): + name = "exporttask" + def run(self, user_id, filename, form_class_name, post_data, *args, **kwargs): self.user = User.objects.get(pk=user_id) self._create_export() @@ -153,12 +155,6 @@ def run(self, user_id, filename, form_class_name, post_data, *args, **kwargs): pass -# The Task base class no longer automatically register tasks -# https://docs.celeryq.dev/en/v4.0.0/whatsnew-4.0.html#the-task-base-class-no-longer-automatically-register-tasks -# https://github.com/celery/celery/issues/5992 -app.tasks.register(OBIEEExportTask()) - - class ReasonForContactingExportTask(ExportTaskBase): name = "reasonforcontactingexport" @@ -238,4 +234,6 @@ def generate_rfc_zip(self): # The Task base class no longer automatically register tasks # https://docs.celeryq.dev/en/v4.0.0/whatsnew-4.0.html#the-task-base-class-no-longer-automatically-register-tasks # https://github.com/celery/celery/issues/5992 -app.tasks.register(ReasonForContactingExportTask()) +tasks = [ExportTask, OBIEEExportTask(), ReasonForContactingExportTask()] +for task in tasks: + app.tasks.register(task) From 6930fd9b7fadd928e421255526a3121f6a51c136 Mon Sep 17 00:00:00 2001 From: said-moj <45761276+said-moj@users.noreply.github.com> Date: Tue, 8 Aug 2023 11:16:21 +0100 Subject: [PATCH 20/22] Kombu versions above 4.1 require celery 4.2 otherwise you will get a AttributeError: async when trying to run worker pods See https://github.com/celery/kombu/issues/873 for more info --- requirements/generated/requirements-dev.txt | 2 +- requirements/generated/requirements-docs.txt | 2 +- requirements/generated/requirements-production.txt | 2 +- requirements/generated/requirements-testing.txt | 2 +- requirements/source/requirements-base.in | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/requirements/generated/requirements-dev.txt b/requirements/generated/requirements-dev.txt index f882716b2..68ee35d73 100644 --- a/requirements/generated/requirements-dev.txt +++ b/requirements/generated/requirements-dev.txt @@ -244,7 +244,7 @@ pytz==2021.1 # -r requirements/source/requirements-base.in # celery # cla-common -pyyaml==5.3.1 +pyyaml==5.4 # via # -r requirements/source/requirements-base.in # aspy.yaml diff --git a/requirements/generated/requirements-docs.txt b/requirements/generated/requirements-docs.txt index cd634ab5c..ccc3c468b 100644 --- a/requirements/generated/requirements-docs.txt +++ b/requirements/generated/requirements-docs.txt @@ -185,7 +185,7 @@ pytz==2021.1 # babel # celery # cla-common -pyyaml==5.3.1 +pyyaml==5.4 # via -r requirements/source/requirements-base.in requests==2.27.1 # via diff --git a/requirements/generated/requirements-production.txt b/requirements/generated/requirements-production.txt index e79ed588e..4f384be69 100644 --- a/requirements/generated/requirements-production.txt +++ b/requirements/generated/requirements-production.txt @@ -164,7 +164,7 @@ pytz==2021.1 # -r requirements/source/requirements-base.in # celery # cla-common -pyyaml==5.3.1 +pyyaml==5.4 # via -r requirements/source/requirements-base.in requests==2.27.1 # via diff --git a/requirements/generated/requirements-testing.txt b/requirements/generated/requirements-testing.txt index daddf5f4f..cafedc835 100644 --- a/requirements/generated/requirements-testing.txt +++ b/requirements/generated/requirements-testing.txt @@ -206,7 +206,7 @@ pytz==2021.1 # -r requirements/source/requirements-base.in # celery # cla-common -pyyaml==5.3.1 +pyyaml==5.4 # via -r requirements/source/requirements-base.in requests==2.27.1 # via diff --git a/requirements/source/requirements-base.in b/requirements/source/requirements-base.in index 620ec1c17..27db4a830 100644 --- a/requirements/source/requirements-base.in +++ b/requirements/source/requirements-base.in @@ -42,7 +42,7 @@ polib==1.0.6 https://github.com/ministryofjustice/djorm-ext-pgfulltext/archive/refs/tags/0.1.0.tar.gz celery[sqs]==4.0.2 kombu==4.1.0 -PyYAML==5.3.1 +PyYAML==5.4 pyminizip==0.2.3 #Irat healthcheck and ping package From 29883e6ea79a2305dbd2667dd593ccdb4e298580 Mon Sep 17 00:00:00 2001 From: said-moj <45761276+said-moj@users.noreply.github.com> Date: Thu, 10 Aug 2023 10:36:34 +0100 Subject: [PATCH 21/22] Clean s3 file name and remove trailing slash --- cla_backend/libs/aws/s3.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/cla_backend/libs/aws/s3.py b/cla_backend/libs/aws/s3.py index 095e0a281..d3604e018 100644 --- a/cla_backend/libs/aws/s3.py +++ b/cla_backend/libs/aws/s3.py @@ -12,6 +12,10 @@ class StaticS3Storage(S3Boto3Storage): class ReportsS3: + @classmethod + def clean_name(cls, name): + return name.strip("/") + @classmethod def get_s3_connection(cls, bucket_name): return S3Boto3Storage(bucket=bucket_name) @@ -19,7 +23,7 @@ def get_s3_connection(cls, bucket_name): @classmethod def download_file(cls, bucket_name, key): try: - obj = cls.get_s3_connection(bucket_name).bucket.Object(key) + obj = cls.get_s3_connection(bucket_name).bucket.Object(cls.clean_name(key)) data = NamedTemporaryFile() obj.download_fileobj(data) data.seek(0) @@ -29,11 +33,11 @@ def download_file(cls, bucket_name, key): @classmethod def save_file(cls, bucket_name, key, path): - cls.get_s3_connection(bucket_name).bucket.Object(key).upload_file(path) + cls.get_s3_connection(bucket_name).bucket.Object(cls.clean_name(key)).upload_file(path) @classmethod def delete_file(cls, bucket_name, key): - cls.get_s3_connection(bucket_name).delete(key) + cls.get_s3_connection(bucket_name).delete(cls.clean_name(key)) @classmethod def save_data_to_bucket(cls, bucket_name, key, content): From 1e2cfa9bc0aa55ecad05c2668a892dabd4f7f5b7 Mon Sep 17 00:00:00 2001 From: said-moj <45761276+said-moj@users.noreply.github.com> Date: Wed, 16 Aug 2023 11:48:59 +0100 Subject: [PATCH 22/22] Add comment for why we need to seek the download to the start --- cla_backend/libs/aws/s3.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cla_backend/libs/aws/s3.py b/cla_backend/libs/aws/s3.py index d3604e018..721e4d3e5 100644 --- a/cla_backend/libs/aws/s3.py +++ b/cla_backend/libs/aws/s3.py @@ -26,6 +26,8 @@ def download_file(cls, bucket_name, key): obj = cls.get_s3_connection(bucket_name).bucket.Object(cls.clean_name(key)) data = NamedTemporaryFile() obj.download_fileobj(data) + # This required otherwise any file reads will start at the end which + # leads to an empty file being downloaded (zero bytes) data.seek(0) return {"headers": {"Content-Type": obj.content_type}, "body": data} except ClientError: