Skip to content

Commit

Permalink
feat: backend to supports multiple aws s3 buckets
Browse files Browse the repository at this point in the history
  • Loading branch information
Nekenhei committed Nov 6, 2024
1 parent 5e57ab6 commit d9a704f
Show file tree
Hide file tree
Showing 7 changed files with 225 additions and 19 deletions.
24 changes: 24 additions & 0 deletions docs/001-custom-features.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,27 @@ This feature allows CCX coaches (ccx_coach in the course access role model) and
- Find the assignment and grade it.
- After grading the assignment, it should not be necessary to approve the grade.
- Go to the progress page, with the student user and check if the grade is assigned to the unit containing the SGA component.

## AWS S3 Storage feature

### Context

The SGA Xblock in its upstream repository approaches the storage capability by relaying on the Django's default storage
that is defined in the edx platform. This feature adds the capability to define whether to use the Django storage
(which is set by default and does not require configurations), or define a custom AWS S3 bucket to store the media
objects processed through the SGA Xblock. This feature is site-aware and could be define in the Admin portal of the
platform.

### Feature

Set the AWS S3 bucket to the Xblock through the Site configurations using the following format:

"aws_s3_storage_data": {
"aws_s3_access_key": "yours_access_key",
"aws_s3_secret_key": "yours_secret_key",
"aws_s3_bucket_name": "AWS_s3_bucket_name",
"aws_s3_region_name": "yours_aws_region_name"
}

Once it has been done, the SGA xblocks for the given site will manage the media objects with the AWS S3 bucket.
If no settings are defined, the Xblock would use the defrault Django storage.
40 changes: 40 additions & 0 deletions edx_sga/backends.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from storages.backends.s3boto3 import S3Boto3Storage
from django.core.files.storage import default_storage
from openedx.core.djangoapps.site_configuration import helpers
from edx_sga.constants import AWS_S3_FIELDS


class StaffGradedAssignmentStorage:
def __init__(self):
aws_s3_bucket = helpers.get_current_site_configuration().get_value("aws_s3_storage_data", '')
self.aws_s3_storage = None

if aws_s3_bucket:
self.check_s3_bucket_keys(aws_s3_bucket)
self.aws_s3_storage = S3Boto3Storage(
access_key=aws_s3_bucket.get('aws_s3_access_key'),
secret_key=aws_s3_bucket.get('aws_s3_secret_key'),
bucket_name=aws_s3_bucket.get('aws_s3_bucket_name'),
region_name=aws_s3_bucket.get('aws_s3_region_name')
)

def check_s3_bucket_keys(self, settings_dict):
"""
Method checks integrity and structure of the settings given.
"""
if not isinstance(settings_dict, dict):
raise ValueError("Wrong storage settings definition. Expected dict.")

for aws_s3_field in AWS_S3_FIELDS:
if aws_s3_field not in settings_dict.keys() or not settings_dict[aws_s3_field]:
raise ValueError(f"Error on storage settings with field '{aws_s3_field}'.")

for value, key in settings_dict.items():
if not value and isinstance(value, str):
raise ValueError(f"Value error on storage Settings: {key}.")

def sga_storage(self):
"""
Returns the storage to be used wheter the default Django's storage or AWS S3 Bucket
"""
return self.aws_s3_storage if self.aws_s3_storage else default_storage
7 changes: 6 additions & 1 deletion edx_sga/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,12 @@

BLOCK_SIZE = 2 ** 10 * 8 # 8kb
ITEM_TYPE = "sga"

AWS_S3_FIELDS = [
'aws_s3_access_key',
'aws_s3_secret_key',
'aws_s3_bucket_name',
'aws_s3_region_name'
]

class ShowAnswer:
"""
Expand Down
31 changes: 21 additions & 10 deletions edx_sga/sga.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
from django.conf import settings
from django.core.exceptions import PermissionDenied
from django.core.files import File
from django.core.files.storage import default_storage
from django.template import Context, Template
from django.utils.encoding import force_text
from django.utils.timezone import now as django_now
Expand All @@ -40,6 +39,7 @@
from xmodule.contentstore.content import StaticContent
from xmodule.util.duedate import get_extended_due_date

from edx_sga.backends import StaffGradedAssignmentStorage
from edx_sga.constants import ITEM_TYPE
from edx_sga.showanswer import ShowAnswerXBlockMixin
from edx_sga.tasks import get_zip_file_name, get_zip_file_path, zip_student_submissions
Expand Down Expand Up @@ -160,6 +160,14 @@ class StaffGradedAssignmentXBlock(
help=_("When the annotated file was uploaded"),
)

@property
def xblock_storage(self):
"""
Defines the storage to be used in this Xblock instance.
"""

return StaffGradedAssignmentStorage().sga_storage()

@classmethod
def student_upload_max_size(cls):
"""
Expand Down Expand Up @@ -283,10 +291,11 @@ def upload_assignment(self, request, suffix=""):
path,
user.username,
)
if default_storage.exists(path):
xblock_storage = self.xblock_storage
if xblock_storage.exists(path):
# save latest submission
default_storage.delete(path)
default_storage.save(path, File(upload.file))
xblock_storage.delete(path)
xblock_storage.save(path, File(upload.file))
return Response(json_body=self.student_state())

@XBlock.handler
Expand Down Expand Up @@ -328,8 +337,9 @@ def staff_upload_annotated(self, request, suffix=""):
state["annotated_mimetype"] = mimetypes.guess_type(upload.file.name)[0]
state["annotated_timestamp"] = utcnow().strftime(DateTime.DATETIME_FORMAT)
path = self.file_storage_path(sha1, filename)
if not default_storage.exists(path):
default_storage.save(path, File(upload.file))
xblock_storage = self.xblock_storage
if not xblock_storage.exists(path):
xblock_storage.save(path, File(upload.file))
module.state = json.dumps(state)
module.save()
log.info(
Expand Down Expand Up @@ -620,8 +630,9 @@ def clear_student_state(self, *args, **kwargs):
submission_file_path = self.file_storage_path(
submission_file_sha1, submission_filename
)
if default_storage.exists(submission_file_path):
default_storage.delete(submission_file_path)
xblock_storage = self.xblock_storage
if xblock_storage.exists(submission_file_path):
xblock_storage.delete(submission_file_path)
submissions_api.reset_score(
student_id, self.block_course_id, self.block_id, clear_state=True
)
Expand Down Expand Up @@ -977,7 +988,7 @@ def is_zip_file_available(self, user):
zip_file_path = get_zip_file_path(
user.username, self.block_course_id, self.block_id, self.location
)
return default_storage.exists(zip_file_path)
return self.xblock_storage.exists(zip_file_path)

def count_archive_files(self, user):
"""
Expand All @@ -987,7 +998,7 @@ def count_archive_files(self, user):
zip_file_path = get_zip_file_path(
user.username, self.block_course_id, self.block_id, self.location
)
with default_storage.open(zip_file_path, "rb") as zip_file:
with self.xblock_storage.open(zip_file_path, "rb") as zip_file:
with closing(ZipFile(zip_file)) as archive:
return len(archive.infolist())

Expand Down
12 changes: 7 additions & 5 deletions edx_sga/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
import tempfile
import zipfile

from django.core.files.storage import default_storage
from celery import shared_task
from opaque_keys.edx.locator import BlockUsageLocator
from common.djangoapps.student.models import user_by_anonymous_id
from submissions import api as submissions_api

from edx_sga.backends import StaffGradedAssignmentStorage
from edx_sga.constants import ITEM_TYPE
from edx_sga.utils import get_file_storage_path, is_finalized_submission

Expand Down Expand Up @@ -73,7 +73,8 @@ def _compress_student_submissions(zip_file_path, block_id, course_id, locator):
student_username,
submission_file_path,
)
with default_storage.open(
xblock_storage = StaffGradedAssignmentStorage().sga_storage()
with xblock_storage.open(
submission_file_path, "rb"
) as destination_file:
filename_in_zip = f"{student_username}_{os.path.basename(submission_file_path)}"
Expand All @@ -82,7 +83,7 @@ def _compress_student_submissions(zip_file_path, block_id, course_id, locator):
tmp.seek(0)
# Write the bytes of the in-memory zip file to an actual file
log.info("Moving zip file from memory to storage at path: %s ", zip_file_path)
default_storage.save(zip_file_path, tmp)
xblock_storage.save(zip_file_path, tmp)


@shared_task
Expand All @@ -99,9 +100,10 @@ def zip_student_submissions(course_id, block_id, locator_unicode, username):
locator = BlockUsageLocator.from_string(locator_unicode)
zip_file_path = get_zip_file_path(username, course_id, block_id, locator)
log.info("Creating zip file for course: %s at path: %s", locator, zip_file_path)
if default_storage.exists(zip_file_path):
xblock_storage = StaffGradedAssignmentStorage().sga_storage()
if xblock_storage.exists(zip_file_path):
log.info("Deleting already-existing zip file at path: %s", zip_file_path)
default_storage.delete(zip_file_path)
xblock_storage.delete(zip_file_path)
_compress_student_submissions(zip_file_path, block_id, course_id, locator)


Expand Down
124 changes: 124 additions & 0 deletions edx_sga/tests/test_backends.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@

import unittest
from unittest.mock import MagicMock, patch
from django.core.files.storage import default_storage
from edx_sga.backends import StaffGradedAssignmentStorage


class StaffGradedAssignmentStorageTest(unittest.TestCase):

@patch('edx_sga.backends.S3Boto3Storage')
@patch('edx_sga.backends.helpers')
def test_returns_s3_storage_storage_settings_given(self, mock_site_helpers, mock_s3_aws_storage):
"""
Test StaffGradedAssignmentStorage to return a valid S3 storage when the bucket data is given
"""
storage_settings = {
'aws_s3_access_key': 'access_key',
'aws_s3_secret_key': 'secret_key',
'aws_s3_bucket_name': 'bucket_name',
'aws_s3_region_name': 'region_name'
}
mock_site_configuration = MagicMock()
mock_site_configuration.get_value.return_value = storage_settings
mock_site_helpers.get_current_site_configuration.return_value = mock_site_configuration

xblock_storage = StaffGradedAssignmentStorage().sga_storage()

mock_s3_aws_storage.assert_called_with(
access_key='access_key',
secret_key='secret_key',
bucket_name='bucket_name',
region_name='region_name'
)

self.assertEqual(xblock_storage, mock_s3_aws_storage.return_value)

@patch('edx_sga.backends.helpers')
def test_returns_default_storage_storage_settings_no_given(self, mock_site_helpers):
"""
Test StaffGradedAssignmentStorage to return the default Django's storage when no settings are defined
"""
mock_site_configuration = MagicMock()
mock_site_configuration.get_value.return_value = None
mock_site_helpers.get_current_site_configuration.return_value = mock_site_configuration

xblock_storage = StaffGradedAssignmentStorage().sga_storage()
self.assertEqual(xblock_storage, default_storage)

@patch('edx_sga.backends.helpers')
def test_check_s3_bucket_proper_values(self, mock_site_helpers):
"""
Checks for check_s3_bucket_keys method to check and return the data given in the format expected.
"""
storage_settings = {
'aws_s3_access_key': 'access_key',
'aws_s3_secret_key': 'secret_key',
'aws_s3_bucket_name': 'bucket_name',
'aws_s3_region_name': 'region_name'
}
mock_site_configuration = MagicMock()
mock_site_configuration.get_value.return_value = storage_settings
mock_site_helpers.get_current_site_configuration.return_value = mock_site_configuration

try:
xblock_storage_instance = StaffGradedAssignmentStorage()
xblock_storage_instance.check_s3_bucket_keys(storage_settings)
except ValueError:
self.fail('Unexpected raise from check_s3_bucket_keys method.')

@patch('edx_sga.backends.helpers')
def test_check_s3_bucket_incomplete_values(self, mock_site_helpers):
"""
Checks for check_s3_bucket_keys method to raises an exception when the data passed doesn't fullfils the needs.
"""
storage_settings = {
'aws_s3_access_key': 'access_key',
'aws_s3_secret_key': 'secret_key',
}
mock_site_configuration = MagicMock()
mock_site_configuration.get_value.return_value = storage_settings
mock_site_helpers.get_current_site_configuration.return_value = mock_site_configuration


with self.assertRaises(ValueError) as exception:
StaffGradedAssignmentStorage()
self.assertIn("Error on storage settings with field 'aws_s3_bucket_name'", str(exception.exception))

@patch('edx_sga.backends.helpers')
def test_sga_storage_returns_defined_storage(self, mock_site_helpers):
"""
Test for StaffGradedAssignmentStorage to have assigned the expected storage when the data is passed
"""
mock_aws_s3_storage = MagicMock(name='MockS3Storage')
storage_settings = {
'aws_s3_access_key': 'access_key',
'aws_s3_secret_key': 'secret_key',
'aws_s3_bucket_name': 'bucket_name',
'aws_s3_region_name': 'region_name'
}
mock_site_configuration = MagicMock()
mock_site_configuration.get_value.return_value = storage_settings
mock_site_helpers.get_current_site_configuration.return_value = mock_site_configuration

sga_storage_instance = StaffGradedAssignmentStorage()
sga_storage_instance.aws_s3_storage = mock_aws_s3_storage

result = sga_storage_instance.sga_storage()
self.assertEqual(result, mock_aws_s3_storage)

@patch('edx_sga.backends.default_storage')
@patch('edx_sga.backends.helpers')
def test_sga_storage_returns_default_storage(self, mock_site_helpers, mock_default_storage):
"""
Test for StaffGradedAssignmentStorage to uses the Django's default storage.
"""
mock_site_configuration = MagicMock()
mock_site_configuration.get_value.return_value = None
mock_site_helpers.get_current_site_configuration.return_value = mock_site_configuration

sga_storage_instance = StaffGradedAssignmentStorage()
sga_storage_instance.aws_s3_storage = None

result = sga_storage_instance.sga_storage()
self.assertEqual(result, mock_default_storage)
6 changes: 3 additions & 3 deletions edx_sga/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

import pytz
from django.conf import settings
from django.core.files.storage import default_storage
from edx_sga.backends import StaffGradedAssignmentStorage
from edx_sga.constants import BLOCK_SIZE


Expand Down Expand Up @@ -42,7 +42,7 @@ def get_file_modified_time_utc(file_path):
else pytz.utc
)

file_time = default_storage.get_modified_time(file_path)
file_time = StaffGradedAssignmentStorage().sga_storage().get_modified_time(file_path)

if file_time.tzinfo is None:
return file_timezone.localize(file_time).astimezone(pytz.utc)
Expand Down Expand Up @@ -74,5 +74,5 @@ def file_contents_iter(file_path):
"""
Returns an iterator over the contents of a file located at the given file path
"""
file_descriptor = default_storage.open(file_path)
file_descriptor = StaffGradedAssignmentStorage().sga_storage().open(file_path)
return iter(partial(file_descriptor.read, BLOCK_SIZE), b"")

0 comments on commit d9a704f

Please sign in to comment.