feat: Add codejail_service app for transition to containerized codejail

edx · Jan 9, 2024 · 934e7ee · 934e7ee
1 parent 6718284
commit 934e7ee
Show file tree

Hide file tree

Showing 17 changed files with 878 additions and 65 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -14,6 +14,12 @@ Change Log
 Unreleased
 ~~~~~~~~~~
 
+[3.2.0] - 2024-01-09
+~~~~~~~~~~~~~~~~~~~~
+Added
+_____
+* Add ``codejail_service`` app for transition to containerized codejail
+
 [3.1.1] - 2023-11-06
 ~~~~~~~~~~~~~~~~~~~~
 Fixed

diff --git a/edx_arch_experiments/__init__.py b/edx_arch_experiments/__init__.py
@@ -2,4 +2,4 @@
 A plugin to include applications under development by the architecture team at 2U.
 """
 
-__version__ = '3.1.1'
+__version__ = '3.2.0'
diff --git a/edx_arch_experiments/codejail_service/README.rst b/edx_arch_experiments/codejail_service/README.rst
@@ -0,0 +1,27 @@
+Codejail Service
+################
+
+When installed in the LMS as a plugin app, the ``codejail_service`` app allows the CMS to delegate codejail executions to the LMS across the network.
+
+This is intended as a `temporary situation <https://github.com/openedx/edx-platform/issues/33538>`_ with the following goals:
+
+- Unblock containerization of the CMS. Codejail cannot be readily containerized due to its reliance on AppArmor, but if codejail execution is outsourced, then we can containerize CMS first and will be in a better position to containerize the LMS afterwards.
+- Exercise the remote-codejail pathway and have an opportunity to discover and implement needed improvements before fully building out a separate, dedicated codejail service.
+
+Setup
+*****
+
+In LMS:
+
+- Install ``edx-arch-experiments`` as a dependency
+- Identify a service account that will be permitted to make calls to the codejail service and ensure it has the ``is_staff`` Django flag. In devstack, this would be ``cms_worker``.
+- In Djano admin, under ``Django OAuth Toolkit > Applications``, find or create a Client Credentials application for that service user.
+
+In CMS:
+
+- Set ``ENABLE_CODEJAIL_REST_SERVICE`` to ``True``
+- Set ``CODE_JAIL_REST_SERVICE_HOST`` to the URL origin of the LMS (e.g. ``http://edx.devstack.lms:18000`` in devstack)
+- Keep ``CODE_JAIL_REST_SERVICE_REMOTE_EXEC`` at its default of ``xmodule.capa.safe_exec.remote_exec.send_safe_exec_request_v0``
+- Adjust ``CODE_JAIL_REST_SERVICE_CONNECT_TIMEOUT`` and ``CODE_JAIL_REST_SERVICE_READ_TIMEOUT`` if needed
+- Set ``CODE_JAIL_REST_SERVICE_OAUTH_URL`` to the LMS OAuth endpoint (e.g. ``http://edx.devstack.lms:18000`` in devstack)
+- Set ``CODE_JAIL_REST_SERVICE_CLIENT_ID`` and ``CODE_JAIL_REST_SERVICE_CLIENT_SECRET`` to the client credentials app ID and secret that you identified in the LMS. (In devstack, these would be ``cms-backend-service-key`` and ``cms-backend-service-secret``.)
diff --git a/edx_arch_experiments/codejail_service/__init__.py b/edx_arch_experiments/codejail_service/__init__.py
diff --git a/edx_arch_experiments/codejail_service/apps.py b/edx_arch_experiments/codejail_service/apps.py
@@ -0,0 +1,22 @@
+"""
+App for running answer submissions inside of codejail.
+"""
+
+from django.apps import AppConfig
+from edx_django_utils.plugins.constants import PluginURLs
+
+
+class CodejailService(AppConfig):
+    """
+    Django application to run things in codejail.
+    """
+    name = 'edx_arch_experiments.codejail_service'
+
+    plugin_app = {
+        PluginURLs.CONFIG: {
+            'lms.djangoapp': {
+                PluginURLs.NAMESPACE: 'codejail_service',
+                PluginURLs.RELATIVE_PATH: 'urls',  # .urls is the default place for plugin views
+            }
+        },
+    }
diff --git a/edx_arch_experiments/codejail_service/urls.py b/edx_arch_experiments/codejail_service/urls.py
@@ -0,0 +1,11 @@
+"""
+Codejail service URLs.
+"""
+
+from django.urls import path
+
+from . import views
+
+urlpatterns = [
+    path('api/v0/code-exec', views.code_exec_view_v0, name="code_exec_v0"),
+]
diff --git a/edx_arch_experiments/codejail_service/views.py b/edx_arch_experiments/codejail_service/views.py
@@ -0,0 +1,153 @@
+"""
+Codejail service API.
+"""
+
+import json
+import logging
+from copy import deepcopy
+
+import jsonschema
+from codejail.safe_exec import SafeExecException, safe_exec
+from django.conf import settings
+from edx_toggles.toggles import SettingToggle
+from rest_framework.decorators import api_view, parser_classes, permission_classes
+from rest_framework.parsers import FormParser, MultiPartParser
+from rest_framework.permissions import IsAdminUser
+from rest_framework.response import Response
+
+log = logging.getLogger(__name__)
+
+# .. toggle_name: CODEJAIL_SERVICE_ENABLED
+# .. toggle_implementation: SettingToggle
+# .. toggle_default: True
+# .. toggle_description: If True, codejail execution calls will be accepted over the network,
+#   allowing this IDA to act as a codejail service for another IDA.
+# .. toggle_use_cases: circuit_breaker
+# .. toggle_creation_date: 2023-12-21
+# .. toggle_tickets: https://github.com/openedx/edx-platform/issues/33538
+CODEJAIL_SERVICE_ENABLED = SettingToggle('CODEJAIL_SERVICE_ENABLED', default=True, module_name=__name__)
+
+# Schema for the JSON passed in the v0 API's 'payload' field.
+payload_schema = {
+    'type': 'object',
+    'properties': {
+        'code': {'type': 'string'},
+        'globals_dict': {'type': 'object'},
+        # Some of these are configured as union types because
+        # edx-platform appears to currently default to None for some
+        # of them (rather than omitting the keys.)
+        'python_path': {
+            'anyOf': [
+                {
+                    'type': 'array',
+                    'items': {'type': 'string'},
+                },
+                {'type': 'null'},
+            ],
+        },
+        'limit_overrides_context': {
+            'anyOf': [
+                {'type': 'string'},
+                {'type': 'null'},
+            ],
+        },
+        'slug': {
+            'anyOf': [
+                {'type': 'string'},
+                {'type': 'null'},
+            ],
+        },
+        'unsafely': {'type': 'boolean'},
+    },
+    'required': ['code'],
+}
+
+# A note on the authorization model used here:
+#
+# We really just need one service account to be able to call this, and
+# then also allow is_staff to call it for convenience and debugging
+# purposes.
+#
+# To do this "right", I'd probably have to create an empty abstract
+# model, create a permission on it, create a group, add the permission
+# to the group, and add the service account to the group. Then I could
+# check the calling user's has_perm. If I wanted to use bridgekeeper
+# (as we're trying to do more of) I might be able to give bridgekeeper
+# a `@blanket_rule` that checks membership in the group, then use
+# bridgekeeper here instead of checking permissions directly, but it's
+# possible this wouldn't work because bridgekeeper might require there
+# to be a model instance to pass in (and there wouldn't be one, since
+# it's just an abstract model.)
+#
+# But... given that the service account will be is_staff, and I'll be
+# opening this up to is_staff alongside the intended service account,
+# and this is already a hacky intermediate solution... we can just use
+# the `IsAdminUser` permission class and be done with it.
+
+
+@api_view(['POST'])
+@parser_classes([FormParser, MultiPartParser])
+@permission_classes([IsAdminUser])
+def code_exec_view_v0(request):
+    """
+    Executes code in a codejail sandbox for a remote caller.
+
+    This implements the API used by edxapp's xmodule.capa.safe_exec.remote_exec.
+    It accepts a POST of a form containing a `payload` value and zero or more
+    extra files.
+
+    The payload is JSON and contains the parameters to be sent to codejail's
+    safe_exec (aside from `extra_files`). See payload_schema for type information.
+
+    This API does not permit `unsafely=true`.
+    """
+    if not CODEJAIL_SERVICE_ENABLED.is_enabled():
+        return Response("Codejail service not enabled", status=500)
+
+    params_json = request.data['payload']
+    params = json.loads(params_json)
+    jsonschema.validate(params, payload_schema)
+
+    complete_code = params['code']  # includes standard prolog
+    input_globals_dict = params.get('globals_dict') or {}
+    python_path = params.get('python_path') or []
+    limit_overrides_context = params.get('limit_overrides_context')
+    slug = params.get('slug')
+    unsafely = params.get('unsafely')
+
+    extra_files = request.FILES
+
+    # There's a risk of getting into a loop if e.g. the CMS asks the
+    # LMS to run codejail executions on its behalf, and the LMS is
+    # *also* inadvertently configured to call the LMS (itself).
+    # There's no good reason to have a chain of >2 services passing
+    # codejail requests along, so only allow execution here if we
+    # aren't going to pass it along to someone else.
+    if getattr(settings, 'ENABLE_CODEJAIL_REST_SERVICE', False):
+        raise Exception(
+            "Refusing to run codejail request from over the network "
+            "when we're going to pass it to another IDA anyway"
+        )
+
+    # Far too dangerous to allow unsafe executions to come in over the
+    # network, no matter who we think the caller is. The caller is the
+    # one who has the context on safety.
+    if unsafely:
+        raise Exception("Refusing to run codejail request from over the network with unsafely=true")
+
+    output_globals_dict = deepcopy(input_globals_dict)  # Output dict will be mutated by safe_exec
+    try:
+        safe_exec(
+            complete_code,
+            output_globals_dict,
+            python_path=python_path,
+            extra_files=extra_files,
+            limit_overrides_context=limit_overrides_context,
+            slug=slug,
+        )
+    except SafeExecException as e:
+        log.debug("CodejailService execution failed with: {e!r}")
+        return Response({'emsg': f"Code jail execution failed: {e!r}"})
+
+    log.debug("CodejailService execution succeeded, with globals={output_globals_dict!r}")
+    return Response({'globals_dict': output_globals_dict})
diff --git a/requirements/base.in b/requirements/base.in
@@ -4,3 +4,8 @@
 Django             # Web application framework
 edx_django_utils
 django-waffle            # Configuration switches and flags -- used by config_watcher app
+edx-codejail             # Actual codejail library; used by codejail_service app
+djangorestframework      # Used by codejail_service app
+edx-drf-extensions       # Used by codejail_service app
+edx-toggles              # Used by codejail_service app
+jsonschema               # Parse and validate JSON; used by codejail_service app
diff --git a/requirements/base.txt b/requirements/base.txt
@@ -6,40 +6,136 @@
 #
 asgiref==3.7.2
     # via django
+attrs==23.2.0
+    # via
+    #   jsonschema
+    #   referencing
+certifi==2023.11.17
+    # via requests
 cffi==1.16.0
-    # via pynacl
+    # via
+    #   cryptography
+    #   pynacl
+charset-normalizer==3.3.2
+    # via requests
 click==8.1.7
-    # via edx-django-utils
+    # via
+    #   code-annotations
+    #   edx-django-utils
+code-annotations==1.5.0
+    # via edx-toggles
+cryptography==41.0.7
+    # via pyjwt
 django==3.2.23
     # via
     #   -c https://raw.githubusercontent.com/edx/edx-lint/master/edx_lint/files/common_constraints.txt
     #   -r requirements/base.in
     #   django-crum
     #   django-waffle
+    #   djangorestframework
+    #   drf-jwt
     #   edx-django-utils
+    #   edx-drf-extensions
+    #   edx-toggles
 django-crum==0.7.9
-    # via edx-django-utils
+    # via
+    #   edx-django-utils
+    #   edx-toggles
 django-waffle==4.1.0
     # via
     #   -r requirements/base.in
     #   edx-django-utils
+    #   edx-drf-extensions
+    #   edx-toggles
+djangorestframework==3.14.0
+    # via
+    #   -r requirements/base.in
+    #   drf-jwt
+    #   edx-drf-extensions
+drf-jwt==1.19.2
+    # via edx-drf-extensions
+edx-codejail==3.3.3
+    # via -r requirements/base.in
 edx-django-utils==5.9.0
+    # via
+    #   -r requirements/base.in
+    #   edx-drf-extensions
+    #   edx-toggles
+edx-drf-extensions==9.0.1
     # via -r requirements/base.in
+edx-opaque-keys==2.5.1
+    # via edx-drf-extensions
+edx-toggles==5.1.0
+    # via -r requirements/base.in
+idna==3.6
+    # via requests
+importlib-resources==6.1.1
+    # via
+    #   jsonschema
+    #   jsonschema-specifications
+jinja2==3.1.2
+    # via code-annotations
+jsonschema==4.20.0
+    # via -r requirements/base.in
+jsonschema-specifications==2023.12.1
+    # via jsonschema
+markupsafe==2.1.3
+    # via jinja2
 newrelic==9.3.0
     # via edx-django-utils
 pbr==6.0.0
     # via stevedore
-psutil==5.9.6
+pkgutil-resolve-name==1.3.10
+    # via jsonschema
+psutil==5.9.7
     # via edx-django-utils
 pycparser==2.21
     # via cffi
+pyjwt[crypto]==2.8.0
+    # via
+    #   drf-jwt
+    #   edx-drf-extensions
+    #   pyjwt
+pymongo==3.13.0
+    # via edx-opaque-keys
 pynacl==1.5.0
     # via edx-django-utils
+python-slugify==8.0.1
+    # via code-annotations
 pytz==2023.3.post1
-    # via django
+    # via
+    #   django
+    #   djangorestframework
+pyyaml==6.0.1
+    # via code-annotations
+referencing==0.32.0
+    # via
+    #   jsonschema
+    #   jsonschema-specifications
+requests==2.31.0
+    # via edx-drf-extensions
+rpds-py==0.16.2
+    # via
+    #   jsonschema
+    #   referencing
+semantic-version==2.10.0
+    # via edx-drf-extensions
+six==1.16.0
+    # via edx-codejail
 sqlparse==0.4.4
     # via django
 stevedore==5.1.0
-    # via edx-django-utils
+    # via
+    #   code-annotations
+    #   edx-django-utils
+    #   edx-opaque-keys
+text-unidecode==1.3
+    # via python-slugify
 typing-extensions==4.9.0
-    # via asgiref
+    # via
+    #   asgiref
+    #   edx-opaque-keys
+urllib3==2.1.0
+    # via requests
+zipp==3.17.0
+    # via importlib-resources