From 48cdbce8930d58c43b0ca5c5dce80f321b762b3a Mon Sep 17 00:00:00 2001 From: "qasim.gulzar" Date: Mon, 25 Mar 2024 10:28:24 +0500 Subject: [PATCH] refactor: [DEPR]: Neo4J Support #34342 --- .github/workflows/unit-test-shards.json | 1 - .../contentstore/signals/handlers.py | 7 - cms/djangoapps/coursegraph/README.rst | 120 ---- cms/djangoapps/coursegraph/__init__.py | 0 cms/djangoapps/coursegraph/admin.py | 123 ---- cms/djangoapps/coursegraph/apps.py | 25 - .../coursegraph/management/__init__.py | 0 .../management/commands/__init__.py | 0 .../management/commands/dump_to_neo4j.py | 114 ---- .../management/commands/tests/__init__.py | 0 .../commands/tests/test_dump_to_neo4j.py | 596 ------------------ .../management/commands/tests/utils.py | 123 ---- cms/djangoapps/coursegraph/models.py | 21 - cms/djangoapps/coursegraph/tasks.py | 420 ------------ cms/djangoapps/coursegraph/tests/__init__.py | 0 .../coursegraph/tests/test_admin.py | 227 ------- cms/envs/common.py | 37 -- cms/envs/devstack.py | 11 - cms/envs/production.py | 5 - setup.cfg | 2 +- 20 files changed, 1 insertion(+), 1831 deletions(-) delete mode 100644 cms/djangoapps/coursegraph/README.rst delete mode 100644 cms/djangoapps/coursegraph/__init__.py delete mode 100644 cms/djangoapps/coursegraph/admin.py delete mode 100644 cms/djangoapps/coursegraph/apps.py delete mode 100644 cms/djangoapps/coursegraph/management/__init__.py delete mode 100644 cms/djangoapps/coursegraph/management/commands/__init__.py delete mode 100644 cms/djangoapps/coursegraph/management/commands/dump_to_neo4j.py delete mode 100644 cms/djangoapps/coursegraph/management/commands/tests/__init__.py delete mode 100644 cms/djangoapps/coursegraph/management/commands/tests/test_dump_to_neo4j.py delete mode 100644 cms/djangoapps/coursegraph/management/commands/tests/utils.py delete mode 100644 cms/djangoapps/coursegraph/models.py delete mode 100644 cms/djangoapps/coursegraph/tasks.py delete mode 100644 cms/djangoapps/coursegraph/tests/__init__.py delete mode 100644 cms/djangoapps/coursegraph/tests/test_admin.py diff --git a/.github/workflows/unit-test-shards.json b/.github/workflows/unit-test-shards.json index 3afd691daf58..3af8c64898f8 100644 --- a/.github/workflows/unit-test-shards.json +++ b/.github/workflows/unit-test-shards.json @@ -239,7 +239,6 @@ "paths": [ "cms/djangoapps/api/", "cms/djangoapps/cms_user_tasks/", - "cms/djangoapps/coursegraph/", "cms/djangoapps/course_creators/", "cms/djangoapps/export_course_metadata/", "cms/djangoapps/maintenance/", diff --git a/cms/djangoapps/contentstore/signals/handlers.py b/cms/djangoapps/contentstore/signals/handlers.py index e0bc9fcc9558..d756424bccaa 100644 --- a/cms/djangoapps/contentstore/signals/handlers.py +++ b/cms/djangoapps/contentstore/signals/handlers.py @@ -123,9 +123,6 @@ def listen_for_course_publish(sender, course_key, **kwargs): # pylint: disable= update_search_index, update_special_exams_and_publish ) - from cms.djangoapps.coursegraph.tasks import ( - dump_course_to_neo4j - ) # DEVELOPER README: probably all tasks here should use transaction.on_commit # to avoid stale data, but the tasks are owned by many teams and are often @@ -146,10 +143,6 @@ def listen_for_course_publish(sender, course_key, **kwargs): # pylint: disable= # Push the course outline to learning_sequences asynchronously. update_outline_from_modulestore_task.delay(course_key_str) - if settings.COURSEGRAPH_DUMP_COURSE_ON_PUBLISH: - # Push the course out to CourseGraph asynchronously. - dump_course_to_neo4j.delay(course_key_str) - # Kick off a courseware indexing action after the data is ready if CoursewareSearchIndexer.indexing_is_enabled() and CourseAboutSearchIndexer.indexing_is_enabled(): transaction.on_commit(lambda: update_search_index.delay(course_key_str, datetime.now(UTC).isoformat())) diff --git a/cms/djangoapps/coursegraph/README.rst b/cms/djangoapps/coursegraph/README.rst deleted file mode 100644 index 420eb8326e54..000000000000 --- a/cms/djangoapps/coursegraph/README.rst +++ /dev/null @@ -1,120 +0,0 @@ - -CourseGraph Support -------------------- - -This app exists to write data to "CourseGraph", a tool enabling Open edX developers and support specialists to inspect their platform instance's learning content. CourseGraph itself is simply an instance of `Neo4j`_, which is an open-source graph database with a Web interface. - -.. _Neo4j: https://neo4j.com - -Deploying Coursegraph -===================== - -There are two ways to deploy CourseGraph: - -* For operators using Tutor, there is a `CourseGraph plugin for Tutor`_ that is currently released as "Beta". Nutmeg is the earliest Open edX release that the plugin will work alongside. - -* For operators still using the old Ansible installation pathway, there exists a `neo4j Ansible playbook`_. Be warned that this method is not well-documented nor officially supported. - -In order for CourseGraph to have queryable, up-to-date data, learning content from CMS must be written to CourseGraph regularly. That is where this Django app comes into play. For details on the various ways to write CMS data to CourseGraph, visit the `operations section of the CourseGraph Tutor plugin docs`_. - -**Please note**: Access to a populated CourseGraph instance confers access to all the learning content in the associated Open edX CMS (Studio). The basic authentication provided by Neo4j may or may not be sufficient for your security needs. Consider taking additional security measures, such as restricting CourseGraph access to only users on a private VPN. - -.. _neo4j Ansible playbook: https://github.com/openedx/configuration/blob/master/playbooks/neo4j.yml - -.. _CourseGraph plugin for Tutor: https://github.com/openedx/tutor-contrib-coursegraph/ - -.. _operations section of the CourseGraph Tutor plugin docs: https://github.com/openedx/tutor-contrib-coursegraph/#managing-data - -Running CourseGraph locally -=========================== - -In some circumstances, you may want to run CourseGraph locally, connected to a development-mode Open edX instance. You can do this in both Tutor and Devstack. - -Tutor -***** - -The `CourseGraph plugin for Tutor`_ makes it easy to install, configure, and run CourseGraph for local development. - -Devstack -******** - -CourseGraph is included as an "extra" component in the `Open edX Devstack`_. That is, it is not run or provisioned by default, but can be enabled on-demand. - -To provision Devstack CourseGraph with data from Devstack LMS, run:: - - make dev.provision.coursegraph - -CourseGraph should now be accessible at http://localhost:7474 with the username ``neo4j`` and the password ``edx``. - -Under the hood, the provisioning command just invokes ``dump_to_neo4j`` on your LMS, pointed at your CourseGraph. The provisioning command can be run again at any point in the future to refresh CourseGraph with new LMS data. The data in CourseGraph will persist unless you explicitly destroy it (as noted below). - -Other Devstack CourseGraph commands include:: - - make dev.up.coursegraph # Bring up the container (without re-provisioning). - make dev.down.coursegraph # Stop and remove the container. - make dev.shell.coursegraph # Start a shell session in the container. - make dev.attach.coursegraph # Attach to the container. - make dev.destroy.coursegraph # Stop the container and destroy its database. - -The above commands should be run in your ``devstack`` folder, and they assume that LMS is already properly provisioned. See the `Devstack interface`_ for more details. - -.. _Open edX Devstack: https://github.com/openedx/devstack/ -.. _Devstack interface: https://edx.readthedocs.io/projects/open-edx-devstack/en/latest/devstack_interface.html - - -Querying Coursegraph -==================== - -CourseGraph is queryable using the `Cypher`_ query language. Open edX learning content is represented in Neo4j using a straightforward scheme: - -* A node is an XBlock usage. - -* Nodes are tagged with their ``block_type``, such as: - - * ``course`` - * ``chapter`` - * ``sequential`` - * ``vertical`` - * ``problem`` - * ``html`` - * etc. - -* Every node is also tagged with ``item``. - -* Parent-child relationships in the course hierarchy are reflected in the ``PARENT_OF`` relationship. - -* Ordered sibling relationships in the course hierarchy are reflected in the ``PRECEDES`` relationship. - -* Fields on each XBlock usage (``.display_name``, ``.data``, etc) are available on the corresponding node. - -.. _Cypher: https://neo4j.com/developer/cypher/ - - -Example Queries -*************** - -How many XBlocks exist in the LMS, by type? :: - - MATCH - (c:course) -[:PARENT_OF*]-> (n:item) - RETURN - distinct(n.block_type) as block_type, - count(n) as number - order by - number DESC - - -In a given course, which units contain problems with custom Python grading code? :: - - MATCH - (c:course) -[:PARENT_OF*]-> (u:vertical) -[:PARENT_OF*]-> (p:problem) - WHERE - p.data CONTAINS 'loncapa/python' - AND - c.course_key = '' - RETURN - u.location - -You can see many more examples of useful CourseGraph queries on the `query archive wiki page`_. - -.. _query archive wiki page: https://openedx.atlassian.net/wiki/spaces/COMM/pages/3273228388/Useful+CourseGraph+Queries diff --git a/cms/djangoapps/coursegraph/__init__.py b/cms/djangoapps/coursegraph/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/cms/djangoapps/coursegraph/admin.py b/cms/djangoapps/coursegraph/admin.py deleted file mode 100644 index f79fa909d286..000000000000 --- a/cms/djangoapps/coursegraph/admin.py +++ /dev/null @@ -1,123 +0,0 @@ -""" -Admin site bindings for coursegraph -""" -import logging - -from django.contrib import admin, messages -from django.utils.translation import gettext as _ -from edx_django_utils.admin.mixins import ReadOnlyAdminMixin - -from .models import CourseGraphCourseDump -from .tasks import ModuleStoreSerializer - -log = logging.getLogger(__name__) - - -@admin.action( - permissions=['change'], - description=_("Dump courses to CourseGraph (respect cache)"), -) -def dump_courses(modeladmin, request, queryset): - """ - Admin action to enqueue Dump-to-CourseGraph tasks for a set of courses, - excluding courses that haven't been published since they were last dumped. - - queryset is a QuerySet of CourseGraphCourseDump objects, which are just - CourseOverview objects under the hood. - """ - all_course_keys = queryset.values_list('id', flat=True) - serializer = ModuleStoreSerializer(all_course_keys) - try: - submitted, skipped = serializer.dump_courses_to_neo4j() - # Unfortunately there is no unified base class for the reasonable - # exceptions we could expect from py2neo (connection unavailable, bolt protocol - # error, and so on), so we just catch broadly, show a generic error banner, - # and then log the exception for site operators to look at. - except Exception as err: # pylint: disable=broad-except - log.exception( - "Failed to enqueue CourseGraph dumps to Neo4j (respecting cache): %s", - ", ".join(str(course_key) for course_key in all_course_keys), - ) - modeladmin.message_user( - request, - _("Error enqueueing dumps for {} course(s): {}").format( - len(all_course_keys), str(err) - ), - level=messages.ERROR, - ) - return - if submitted: - modeladmin.message_user( - request, - _( - "Enqueued dumps for {} course(s). Skipped {} unchanged course(s)." - ).format(len(submitted), len(skipped)), - level=messages.SUCCESS, - ) - else: - modeladmin.message_user( - request, - _( - "Skipped all {} course(s), as they were unchanged.", - ).format(len(skipped)), - level=messages.WARNING, - ) - - -@admin.action( - permissions=['change'], - description=_("Dump courses to CourseGraph (override cache)") -) -def dump_courses_overriding_cache(modeladmin, request, queryset): - """ - Admin action to enqueue Dump-to-CourseGraph tasks for a set of courses - (whether or not they have been published recently). - - queryset is a QuerySet of CourseGraphCourseDump objects, which are just - CourseOverview objects under the hood. - """ - all_course_keys = queryset.values_list('id', flat=True) - serializer = ModuleStoreSerializer(all_course_keys) - try: - submitted, _skipped = serializer.dump_courses_to_neo4j(override_cache=True) - # Unfortunately there is no unified base class for the reasonable - # exceptions we could expect from py2neo (connection unavailable, bolt protocol - # error, and so on), so we just catch broadly, show a generic error banner, - # and then log the exception for site operators to look at. - except Exception as err: # pylint: disable=broad-except - log.exception( - "Failed to enqueue CourseGraph Neo4j course dumps (overriding cache): %s", - ", ".join(str(course_key) for course_key in all_course_keys), - ) - modeladmin.message_user( - request, - _("Error enqueueing dumps for {} course(s): {}").format( - len(all_course_keys), str(err) - ), - level=messages.ERROR, - ) - return - modeladmin.message_user( - request, - _("Enqueued dumps for {} course(s).").format(len(submitted)), - level=messages.SUCCESS, - ) - - -@admin.register(CourseGraphCourseDump) -class CourseGraphCourseDumpAdmin(ReadOnlyAdminMixin, admin.ModelAdmin): - """ - Model admin for "Course graph course dumps". - - Just a read-only table with some useful metadata, allowing admin users to - select courses to be dumped to CourseGraph. - """ - list_display = [ - 'id', - 'display_name', - 'modified', - 'enrollment_start', - 'enrollment_end', - ] - search_fields = ['id', 'display_name'] - actions = [dump_courses, dump_courses_overriding_cache] diff --git a/cms/djangoapps/coursegraph/apps.py b/cms/djangoapps/coursegraph/apps.py deleted file mode 100644 index 71ae91ad493e..000000000000 --- a/cms/djangoapps/coursegraph/apps.py +++ /dev/null @@ -1,25 +0,0 @@ -""" -Coursegraph Application Configuration - -Signal handlers are connected here. -""" -import warnings - -from django.apps import AppConfig - - -class CoursegraphConfig(AppConfig): - """ - AppConfig for courseware app - """ - name = 'cms.djangoapps.coursegraph' - - from cms.djangoapps.coursegraph import tasks - - def ready(self) -> None: - warnings.warn( - "Neo4j support is going to be dropped after Sumac release," - "to read more here is a github issue https://github.com/openedx/edx-platform/issues/34342", - DeprecationWarning, - stacklevel=2 - ) diff --git a/cms/djangoapps/coursegraph/management/__init__.py b/cms/djangoapps/coursegraph/management/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/cms/djangoapps/coursegraph/management/commands/__init__.py b/cms/djangoapps/coursegraph/management/commands/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/cms/djangoapps/coursegraph/management/commands/dump_to_neo4j.py b/cms/djangoapps/coursegraph/management/commands/dump_to_neo4j.py deleted file mode 100644 index 40afe7ffbe7d..000000000000 --- a/cms/djangoapps/coursegraph/management/commands/dump_to_neo4j.py +++ /dev/null @@ -1,114 +0,0 @@ -""" -This file contains a management command for exporting the modulestore to -Neo4j, a graph database. - -Example usages: - - # Dump all courses published since last dump. - # Use connection parameters from `settings.COURSEGRAPH_SETTINGS`. - python manage.py cms dump_to_neo4j - - # Dump all courses published since last dump. - # Use custom connection parameters. - python manage.py cms dump_to_neo4j --host localhost --port 7473 \ - --secure --user user --password password - - # Specify certain courses instead of dumping all of them. - # Use connection parameters from `settings.COURSEGRAPH_SETTINGS`. - python manage.py cms dump_to_neo4j --courses 'course-v1:A+B+1' 'course-v1:A+B+2' -""" - - -import logging -from textwrap import dedent - -from django.core.management.base import BaseCommand - -from cms.djangoapps.coursegraph.tasks import ModuleStoreSerializer - -log = logging.getLogger(__name__) - - -class Command(BaseCommand): - """ - Dump recently-published course(s) over to a CourseGraph (Neo4j) instance. - """ - help = dedent(__doc__).strip() - - def add_arguments(self, parser): - parser.add_argument( - '--host', - type=str, - help="the hostname of the Neo4j server", - ) - parser.add_argument( - '--port', - type=int, - help="the port on the Neo4j server that accepts Bolt requests", - ) - parser.add_argument( - '--secure', - action='store_true', - help="connect to server over Bolt/TLS instead of plain unencrypted Bolt", - ) - parser.add_argument( - '--user', - type=str, - help="the username of the Neo4j user", - ) - parser.add_argument( - '--password', - type=str, - help="the password of the Neo4j user", - ) - parser.add_argument( - '--courses', - metavar='KEY', - type=str, - nargs='*', - help="keys of courses to serialize; if omitted all courses in system are serialized", - ) - parser.add_argument( - '--skip', - metavar='KEY', - type=str, - nargs='*', - help="keys of courses to NOT to serialize", - ) - parser.add_argument( - '--override', - action='store_true', - help="dump all courses regardless of when they were last published", - ) - - def handle(self, *args, **options): - """ - Iterates through each course, serializes them into graphs, and saves - those graphs to neo4j. - """ - - mss = ModuleStoreSerializer.create(options['courses'], options['skip']) - connection_overrides = { - key: options[key] - for key in ["host", "port", "secure", "user", "password"] - } - submitted_courses, skipped_courses = mss.dump_courses_to_neo4j( - connection_overrides=connection_overrides, - override_cache=options['override'], - ) - - log.info( - "%d courses submitted for export to neo4j. %d courses skipped.", - len(submitted_courses), - len(skipped_courses), - ) - - if not submitted_courses: - print("No courses submitted for export to neo4j at all!") - return - - if submitted_courses: - print( - "These courses were submitted for export to neo4j successfully:\n\t" + - "\n\t".join(submitted_courses) - ) diff --git a/cms/djangoapps/coursegraph/management/commands/tests/__init__.py b/cms/djangoapps/coursegraph/management/commands/tests/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/cms/djangoapps/coursegraph/management/commands/tests/test_dump_to_neo4j.py b/cms/djangoapps/coursegraph/management/commands/tests/test_dump_to_neo4j.py deleted file mode 100644 index 24595098d3bc..000000000000 --- a/cms/djangoapps/coursegraph/management/commands/tests/test_dump_to_neo4j.py +++ /dev/null @@ -1,596 +0,0 @@ -""" -Tests for the dump_to_neo4j management command. -""" - - -from datetime import datetime - -from unittest import mock -import ddt -from django.core.management import call_command -from django.test.utils import override_settings -from edx_toggles.toggles.testutils import override_waffle_switch -from xmodule.modulestore.tests.django_utils import SharedModuleStoreTestCase -from xmodule.modulestore.tests.factories import CourseFactory, BlockFactory - -import openedx.core.djangoapps.content.block_structure.config as block_structure_config -from openedx.core.djangoapps.content.block_structure.signals import update_block_structure_on_course_publish -from cms.djangoapps.coursegraph.management.commands.dump_to_neo4j import ModuleStoreSerializer -from cms.djangoapps.coursegraph.management.commands.tests.utils import MockGraph, MockNodeMatcher -from cms.djangoapps.coursegraph.tasks import ( - coerce_types, - serialize_course, - serialize_item, - should_dump_course, - strip_branch_and_version -) -from openedx.core.djangolib.testing.utils import skip_unless_lms - - -class TestDumpToNeo4jCommandBase(SharedModuleStoreTestCase): - """ - Base class for the test suites in this file. Sets up a couple courses. - """ - @classmethod - def setUpClass(cls): - r""" - Creates two courses; one that's just a course block, and one that - looks like: - course - | - chapter - | - sequential - | - vertical - / | \ \ - / | \ ---------- - / | \ \ - / | --- \ - / | \ \ - html -> problem -> video -> video2 - - The side-pointing arrows (->) are PRECEDES relationships; the more - vertical lines are PARENT_OF relationships. - - The vertical in this course and the first video have the same - display_name, so that their block_ids are the same. This is to - test for a bug where xblocks with the same block_ids (but different - locations) pointed to themselves erroneously. - """ - super().setUpClass() - cls.course = CourseFactory.create() - cls.chapter = BlockFactory.create(parent=cls.course, category='chapter') - cls.sequential = BlockFactory.create(parent=cls.chapter, category='sequential') - cls.vertical = BlockFactory.create(parent=cls.sequential, category='vertical', display_name='subject') - cls.html = BlockFactory.create(parent=cls.vertical, category='html') - cls.problem = BlockFactory.create(parent=cls.vertical, category='problem') - cls.video = BlockFactory.create(parent=cls.vertical, category='video', display_name='subject') - cls.video2 = BlockFactory.create(parent=cls.vertical, category='video') - - cls.course2 = CourseFactory.create() - - cls.course_strings = [str(cls.course.id), str(cls.course2.id)] - - @staticmethod - def setup_mock_graph(mock_matcher_class, mock_graph_class, transaction_errors=False): - """ - Replaces the py2neo Graph object with a MockGraph; similarly replaces - NodeMatcher with MockNodeMatcher. - - Arguments: - mock_matcher_class: a mocked NodeMatcher class - mock_graph_class: a mocked Graph class - transaction_errors: a bool for whether we should get errors - when transactions try to commit - - Returns: an instance of MockGraph - """ - - mock_graph = MockGraph(transaction_errors=transaction_errors) - mock_graph_class.return_value = mock_graph - - mock_node_matcher = MockNodeMatcher(mock_graph) - mock_matcher_class.return_value = mock_node_matcher - return mock_graph - - def assertCourseDump(self, mock_graph, number_of_courses, number_commits, number_rollbacks): - """ - Asserts that we have the expected number of courses, commits, and - rollbacks after we dump the modulestore to neo4j - Arguments: - mock_graph: a MockGraph backend - number_of_courses: number of courses we expect to find - number_commits: number of commits we expect against the graph - number_rollbacks: number of commit rollbacks we expect - """ - courses = {node['course_key'] for node in mock_graph.nodes} - assert len(courses) == number_of_courses - assert mock_graph.number_commits == number_commits - assert mock_graph.number_rollbacks == number_rollbacks - - -@ddt.ddt -class TestDumpToNeo4jCommand(TestDumpToNeo4jCommandBase): - """ - Tests for the dump to neo4j management command - """ - - @mock.patch('cms.djangoapps.coursegraph.tasks.NodeMatcher') - @mock.patch('cms.djangoapps.coursegraph.tasks.Graph') - @ddt.data(1, 2) - def test_dump_specific_courses(self, number_of_courses, mock_graph_class, mock_matcher_class): - """ - Test that you can specify which courses you want to dump. - """ - mock_graph = self.setup_mock_graph(mock_matcher_class, mock_graph_class) - - call_command( - 'dump_to_neo4j', - courses=self.course_strings[:number_of_courses], - host='mock_host', - port=7687, - user='mock_user', - password='mock_password', - ) - - self.assertCourseDump( - mock_graph, - number_of_courses=number_of_courses, - number_commits=number_of_courses, - number_rollbacks=0 - ) - - @mock.patch('cms.djangoapps.coursegraph.tasks.NodeMatcher') - @mock.patch('cms.djangoapps.coursegraph.tasks.Graph') - def test_dump_skip_course(self, mock_graph_class, mock_matcher_class): - """ - Test that you can skip courses. - """ - mock_graph = self.setup_mock_graph( - mock_matcher_class, mock_graph_class - ) - - call_command( - 'dump_to_neo4j', - skip=self.course_strings[:1], - host='mock_host', - port=7687, - user='mock_user', - password='mock_password', - ) - - self.assertCourseDump( - mock_graph, - number_of_courses=1, - number_commits=1, - number_rollbacks=0, - ) - - @mock.patch('cms.djangoapps.coursegraph.tasks.NodeMatcher') - @mock.patch('cms.djangoapps.coursegraph.tasks.Graph') - def test_dump_skip_beats_specifying(self, mock_graph_class, mock_matcher_class): - """ - Test that if you skip and specify the same course, you'll skip it. - """ - mock_graph = self.setup_mock_graph( - mock_matcher_class, mock_graph_class - ) - - call_command( - 'dump_to_neo4j', - skip=self.course_strings[:1], - courses=self.course_strings[:1], - host='mock_host', - port=7687, - user='mock_user', - password='mock_password', - ) - - self.assertCourseDump( - mock_graph, - number_of_courses=0, - number_commits=0, - number_rollbacks=0, - ) - - @mock.patch('cms.djangoapps.coursegraph.tasks.NodeMatcher') - @mock.patch('cms.djangoapps.coursegraph.tasks.Graph') - def test_dump_all_courses(self, mock_graph_class, mock_matcher_class): - """ - Test if you don't specify which courses to dump, then you'll dump - all of them. - """ - mock_graph = self.setup_mock_graph( - mock_matcher_class, mock_graph_class - ) - - call_command( - 'dump_to_neo4j', - host='mock_host', - port=7687, - user='mock_user', - password='mock_password' - ) - - self.assertCourseDump( - mock_graph, - number_of_courses=2, - number_commits=2, - number_rollbacks=0, - ) - - @mock.patch('cms.djangoapps.coursegraph.tasks.NodeMatcher') - @mock.patch('cms.djangoapps.coursegraph.tasks.Graph', autospec=True) - @override_settings( - COURSEGRAPH_CONNECTION=dict( - protocol='bolt', - host='coursegraph.example.edu', - port=7777, - secure=True, - user="neo4j", - password="default-password", - ) - ) - def test_dump_to_neo4j_connection_defaults(self, mock_graph_class, mock_matcher_class): - """ - Test that user can override individual settings.COURSEGRAPH_CONNECTION parameters - by passing them to `dump_to_neo4j`, whilst falling back to the ones that they - don't override. - """ - self.setup_mock_graph( - mock_matcher_class, mock_graph_class - ) - call_command( - 'dump_to_neo4j', - courses=self.course_strings[:1], - port=7788, - secure=False, - password="overridden-password", - ) - assert mock_graph_class.call_args.args == () - assert mock_graph_class.call_args.kwargs == dict( - - # From settings: - protocol='bolt', - host='coursegraph.example.edu', - user="neo4j", - - # Overriden by command: - port=7788, - secure=False, - password="overridden-password", - ) - - -class SomeThing: - """Just to test the stringification of an object.""" - def __str__(self): - return "" - - -@skip_unless_lms -@ddt.ddt -class TestModuleStoreSerializer(TestDumpToNeo4jCommandBase): - """ - Tests for the ModuleStoreSerializer - """ - @classmethod - def setUpClass(cls): - """Any ModuleStore course/content operations can go here.""" - super().setUpClass() - cls.mss = ModuleStoreSerializer.create() - - def test_serialize_item(self): - """ - Tests the serialize_item method. - """ - fields, label = serialize_item(self.course) - assert label == 'course' - assert 'edited_on' in list(fields.keys()) - assert 'display_name' in list(fields.keys()) - assert 'org' in list(fields.keys()) - assert 'course' in list(fields.keys()) - assert 'run' in list(fields.keys()) - assert 'course_key' in list(fields.keys()) - assert 'location' in list(fields.keys()) - assert 'block_type' in list(fields.keys()) - assert 'detached' in list(fields.keys()) - assert 'checklist' not in list(fields.keys()) - - def test_serialize_course(self): - """ - Tests the serialize_course method. - """ - nodes, relationships = serialize_course(self.course.id) - assert len(nodes) == 9 - # the course has 7 "PARENT_OF" relationships and 3 "PRECEDES" - assert len(relationships) == 10 - - def test_strip_version_and_branch(self): - """ - Tests that the _strip_version_and_branch function strips the version - and branch from a location - """ - location = self.course.id.make_usage_key( - 'test_block_type', 'test_block_id' - ).for_branch( - 'test_branch' - ).for_version(b'test_version') - - assert location.branch is not None - assert location.version_guid is not None - - stripped_location = strip_branch_and_version(location) - - assert stripped_location.branch is None - assert stripped_location.version_guid is None - - @staticmethod - def _extract_relationship_pairs(relationships, relationship_type): - """ - Extracts a list of XBlock location tuples from a list of Relationships. - - Arguments: - relationships: list of py2neo `Relationship` objects - relationship_type: the type of relationship to filter `relationships` - by. - Returns: - List of tuples of the locations of of the relationships' - constituent nodes. - """ - relationship_pairs = [ - (rel.start_node["location"], rel.end_node["location"]) - for rel in relationships if type(rel).__name__ == relationship_type - ] - return relationship_pairs - - @staticmethod - def _extract_location_pair(xblock1, xblock2): - """ - Returns a tuple of locations from two XBlocks. - - Arguments: - xblock1: an xblock - xblock2: also an xblock - - Returns: - A tuple of the string representations of those XBlocks' locations. - """ - return (str(xblock1.location), str(xblock2.location)) - - def assertBlockPairIsRelationship(self, xblock1, xblock2, relationships, relationship_type): - """ - Helper assertion that a pair of xblocks have a certain kind of - relationship with one another. - """ - relationship_pairs = self._extract_relationship_pairs(relationships, relationship_type) - location_pair = self._extract_location_pair(xblock1, xblock2) - assert location_pair in relationship_pairs - - def assertBlockPairIsNotRelationship(self, xblock1, xblock2, relationships, relationship_type): - """ - The opposite of `assertBlockPairIsRelationship`: asserts that a pair - of xblocks do NOT have a certain kind of relationship. - """ - relationship_pairs = self._extract_relationship_pairs(relationships, relationship_type) - location_pair = self._extract_location_pair(xblock1, xblock2) - assert location_pair not in relationship_pairs - - def test_precedes_relationship(self): - """ - Tests that two nodes that should have a precedes relationship have it. - """ - __, relationships = serialize_course(self.course.id) - self.assertBlockPairIsRelationship(self.video, self.video2, relationships, "PRECEDES") - self.assertBlockPairIsNotRelationship(self.video2, self.video, relationships, "PRECEDES") - self.assertBlockPairIsNotRelationship(self.vertical, self.video, relationships, "PRECEDES") - self.assertBlockPairIsNotRelationship(self.html, self.video, relationships, "PRECEDES") - - def test_parent_relationship(self): - """ - Test that two nodes that should have a parent_of relationship have it. - """ - __, relationships = serialize_course(self.course.id) - self.assertBlockPairIsRelationship(self.vertical, self.video, relationships, "PARENT_OF") - self.assertBlockPairIsRelationship(self.vertical, self.html, relationships, "PARENT_OF") - self.assertBlockPairIsRelationship(self.course, self.chapter, relationships, "PARENT_OF") - self.assertBlockPairIsNotRelationship(self.course, self.video, relationships, "PARENT_OF") - self.assertBlockPairIsNotRelationship(self.video, self.vertical, relationships, "PARENT_OF") - self.assertBlockPairIsNotRelationship(self.video, self.html, relationships, "PARENT_OF") - - def test_nodes_have_indices(self): - """ - Test that we add index values on nodes - """ - nodes, relationships = serialize_course(self.course.id) # lint-amnesty, pylint: disable=unused-variable - - # the html node should have 0 index, and the problem should have 1 - html_nodes = [node for node in nodes if node['block_type'] == 'html'] - assert len(html_nodes) == 1 - problem_nodes = [node for node in nodes if node['block_type'] == 'problem'] - assert len(problem_nodes) == 1 - html_node = html_nodes[0] - problem_node = problem_nodes[0] - - assert html_node['index'] == 0 - assert problem_node['index'] == 1 - - @ddt.data( - (1, 1), - (SomeThing(), ""), - (1.5, 1.5), - ("úñîçø∂é", "úñîçø∂é"), - (b"plain string", b"plain string"), - (True, True), - (None, "None"), - ((1,), "(1,)"), - # list of elements should be coerced into a list of the - # string representations of those elements - ([SomeThing(), SomeThing()], ["", ""]), - ([1, 2], ["1", "2"]), - ) - @ddt.unpack - def test_coerce_types(self, original_value, coerced_expected): - """ - Tests the coerce_types helper - """ - coerced_value = coerce_types(original_value) - assert coerced_value == coerced_expected - - @mock.patch('cms.djangoapps.coursegraph.tasks.NodeMatcher') - @mock.patch('cms.djangoapps.coursegraph.tasks.authenticate_and_create_graph') - def test_dump_to_neo4j(self, mock_graph_constructor, mock_matcher_class): - """ - Tests the dump_to_neo4j method works against a mock - py2neo Graph - """ - mock_graph = MockGraph() - mock_graph_constructor.return_value = mock_graph - mock_matcher_class.return_value = MockNodeMatcher(mock_graph) - # mocking is thorwing error in kombu serialzier and its not require here any more. - credentials = {} - - submitted, skipped = self.mss.dump_courses_to_neo4j(credentials) # lint-amnesty, pylint: disable=unused-variable - - self.assertCourseDump( - mock_graph, - number_of_courses=2, - number_commits=2, - number_rollbacks=0, - ) - - # 9 nodes + 7 relationships from the first course - # 2 nodes and no relationships from the second - - assert len(mock_graph.nodes) == 11 - self.assertCountEqual(submitted, self.course_strings) - - @mock.patch('cms.djangoapps.coursegraph.tasks.NodeMatcher') - @mock.patch('cms.djangoapps.coursegraph.tasks.authenticate_and_create_graph') - def test_dump_to_neo4j_rollback(self, mock_graph_constructor, mock_matcher_class): - """ - Tests that the the dump_to_neo4j method handles the case where there's - an exception trying to write to the neo4j database. - """ - mock_graph = MockGraph(transaction_errors=True) - mock_graph_constructor.return_value = mock_graph - mock_matcher_class.return_value = MockNodeMatcher(mock_graph) - # mocking is thorwing error in kombu serialzier and its not require here any more. - credentials = {} - - submitted, skipped = self.mss.dump_courses_to_neo4j(credentials) # lint-amnesty, pylint: disable=unused-variable - - self.assertCourseDump( - mock_graph, - number_of_courses=0, - number_commits=0, - number_rollbacks=2, - ) - - self.assertCountEqual(submitted, self.course_strings) - - @mock.patch('cms.djangoapps.coursegraph.tasks.NodeMatcher') - @mock.patch('cms.djangoapps.coursegraph.tasks.authenticate_and_create_graph') - @ddt.data((True, 2), (False, 0)) - @ddt.unpack - def test_dump_to_neo4j_cache( - self, - override_cache, - expected_number_courses, - mock_graph_constructor, - mock_matcher_class, - ): - """ - Tests the caching mechanism and override to make sure we only publish - recently updated courses. - """ - mock_graph = MockGraph() - mock_graph_constructor.return_value = mock_graph - mock_matcher_class.return_value = MockNodeMatcher(mock_graph) - # mocking is thorwing error in kombu serialzier and its not require here any more. - credentials = {} - - # run once to warm the cache - self.mss.dump_courses_to_neo4j( - credentials, override_cache=override_cache - ) - - # when run the second time, only dump courses if the cache override - # is enabled - submitted, __ = self.mss.dump_courses_to_neo4j( - credentials, override_cache=override_cache - ) - assert len(submitted) == expected_number_courses - - @mock.patch('cms.djangoapps.coursegraph.tasks.NodeMatcher') - @mock.patch('cms.djangoapps.coursegraph.tasks.authenticate_and_create_graph') - def test_dump_to_neo4j_published(self, mock_graph_constructor, mock_matcher_class): - """ - Tests that we only dump those courses that have been published after - the last time the command was been run. - """ - mock_graph = MockGraph() - mock_graph_constructor.return_value = mock_graph - mock_matcher_class.return_value = MockNodeMatcher(mock_graph) - # mocking is thorwing error in kombu serialzier and its not require here any more. - credentials = {} - - # run once to warm the cache - submitted, skipped = self.mss.dump_courses_to_neo4j(credentials) # lint-amnesty, pylint: disable=unused-variable - assert len(submitted) == len(self.course_strings) - - # simulate one of the courses being published - with override_waffle_switch(block_structure_config.STORAGE_BACKING_FOR_CACHE, True): - update_block_structure_on_course_publish(None, self.course.id) - - # make sure only the published course was dumped - submitted, __ = self.mss.dump_courses_to_neo4j(credentials) - assert len(submitted) == 1 - assert submitted[0] == str(self.course.id) - - @mock.patch('cms.djangoapps.coursegraph.tasks.get_course_last_published') - @mock.patch('cms.djangoapps.coursegraph.tasks.get_command_last_run') - @ddt.data( - ( - str(datetime(2016, 3, 30)), str(datetime(2016, 3, 31)), - (True, ( - 'course has been published since last neo4j update time - ' - 'update date 2016-03-30 00:00:00 < published date 2016-03-31 00:00:00' - )) - ), - ( - str(datetime(2016, 3, 31)), str(datetime(2016, 3, 30)), - (False, None) - ), - ( - str(datetime(2016, 3, 31)), None, - (False, None) - ), - ( - None, str(datetime(2016, 3, 30)), - (True, 'no record of the last neo4j update time for the course') - ), - ( - None, None, - (True, 'no record of the last neo4j update time for the course') - ), - ) - @ddt.unpack - def test_should_dump_course( - self, - last_command_run, - last_course_published, - should_dump, - mock_get_command_last_run, - mock_get_course_last_published, - ): - """ - Tests whether a course should be dumped given the last time it was - dumped and the last time it was published. - """ - mock_get_command_last_run.return_value = last_command_run - mock_get_course_last_published.return_value = last_course_published - mock_course_key = mock.Mock() - mock_graph = mock.Mock() - assert should_dump_course(mock_course_key, mock_graph) == should_dump diff --git a/cms/djangoapps/coursegraph/management/commands/tests/utils.py b/cms/djangoapps/coursegraph/management/commands/tests/utils.py deleted file mode 100644 index c1b776b7bfc6..000000000000 --- a/cms/djangoapps/coursegraph/management/commands/tests/utils.py +++ /dev/null @@ -1,123 +0,0 @@ -""" -Utilities for testing the dump_to_neo4j management command -""" - - -from py2neo import Node - - -class MockGraph: - """ - A stubbed out version of py2neo's Graph object, used for testing. - Args: - transaction_errors: a bool for whether transactions should throw - an error. - """ - def __init__(self, transaction_errors=False, **kwargs): # pylint: disable=unused-argument - self.nodes = set() - self.number_commits = 0 - self.number_rollbacks = 0 - self.transaction_errors = transaction_errors - - def begin(self): - """ - A stub of the method that generates transactions - Returns: a MockTransaction object (instead of a py2neo Transaction) - """ - return MockTransaction(self) - - def commit(self, transaction): - """ - Takes elements in the mock transaction's temporary storage and adds them - to this mock graph's storage. Throws an error if this graph's - transaction_errors param is set to True. - """ - if self.transaction_errors: - raise Exception("fake exception while trying to commit") - for element in transaction.temp: - self.nodes.add(element) - transaction.temp.clear() - self.number_commits += 1 - - def rollback(self, transaction): - """ - Clears the transactions temporary storage - """ - transaction.temp.clear() - self.number_rollbacks += 1 - - -class MockTransaction: - """ - A stubbed out version of py2neo's Transaction object, used for testing. - """ - def __init__(self, graph): - self.temp = set() - self.graph = graph - - def run(self, query): - """ - Deletes all nodes associated with a course. Normally `run` executes - an arbitrary query, but in our code, we only use it to delete nodes - associated with a course. - Args: - query: query string to be executed (in this case, to delete all - nodes associated with a course) - """ - start_string = "WHERE n.course_key='" - start = query.index(start_string) + len(start_string) - query = query[start:] - end = query.find("'") - course_key = query[:end] - - self.graph.nodes = { - node for node in self.graph.nodes if node['course_key'] != course_key - } - - def create(self, element): - """ - Adds elements to the transaction's temporary backend storage - Args: - element: a py2neo Node object - """ - if isinstance(element, Node): - self.temp.add(element) - - -class MockNodeMatcher: - """ - Mocks out py2neo's NodeMatcher class. Used to match a node from a graph. - py2neo's NodeMatcher expects a real graph object to run queries against, - so, rather than have to mock out MockGraph to accommodate those queries, - it seemed simpler to mock out NodeMatcher as well. - """ - def __init__(self, graph): - self.graph = graph - - def match(self, label, course_key): - """ - Selects nodes that match a label and course_key - Args: - label: the string of the label we're selecting nodes by - course_key: the string of the course key we're selecting node by - - Returns: a MockResult of matching nodes - """ - nodes = [] - for node in self.graph.nodes: - if node.has_label(label) and node["course_key"] == course_key: - nodes.append(node) - return MockNodeMatch(nodes) - - -class MockNodeMatch(list): - """ - Mocks out py2neo's NodeMatch class: this is the type of what - MockNodeMatcher's `match` method returns. - """ - def first(self): - """ - Returns: the first element of a list if the list has elements. - Otherwise, None. - """ - return self[0] if self else None diff --git a/cms/djangoapps/coursegraph/models.py b/cms/djangoapps/coursegraph/models.py deleted file mode 100644 index f053dc9993ac..000000000000 --- a/cms/djangoapps/coursegraph/models.py +++ /dev/null @@ -1,21 +0,0 @@ -""" -(Proxy) models supporting CourseGraph. -""" - -from openedx.core.djangoapps.content.course_overviews.models import CourseOverview - - -class CourseGraphCourseDump(CourseOverview): - """ - Proxy model for CourseOverview. - - Does *not* create/update/delete CourseOverview objects - only reads the objects. - Uses the course IDs of the CourseOverview objects to determine which courses - can be dumped to CourseGraph. - """ - class Meta: - proxy = True - - def __str__(self): - """Represent ourselves with the course key.""" - return str(self.id) diff --git a/cms/djangoapps/coursegraph/tasks.py b/cms/djangoapps/coursegraph/tasks.py deleted file mode 100644 index e2d4bf5b0976..000000000000 --- a/cms/djangoapps/coursegraph/tasks.py +++ /dev/null @@ -1,420 +0,0 @@ -""" -This file contains a management command for exporting the modulestore to -neo4j, a graph database. -""" - - -import logging - -from celery import shared_task -from django.conf import settings -from django.utils import timezone -from edx_django_utils.cache import RequestCache -from edx_django_utils.monitoring import set_code_owner_attribute -from opaque_keys.edx.keys import CourseKey - -import py2neo # pylint: disable=unused-import -from py2neo import Graph, Node, Relationship - -try: - from py2neo.matching import NodeMatcher -except ImportError: - from py2neo import NodeMatcher -else: - pass - - -log = logging.getLogger(__name__) -celery_log = logging.getLogger('edx.celery.task') - -# When testing locally, neo4j's bolt logger was noisy, so we'll only have it -# emit logs if there's an error. -bolt_log = logging.getLogger('neo4j.bolt') # pylint: disable=invalid-name -bolt_log.setLevel(logging.ERROR) - -PRIMITIVE_NEO4J_TYPES = (int, bytes, str, float, bool) - - -def serialize_item(item): - """ - Args: - item: an XBlock - - Returns: - fields: a dictionary of an XBlock's field names and values - block_type: the name of the XBlock's type (i.e. 'course' - or 'problem') - """ - from xmodule.modulestore.store_utilities import DETACHED_XBLOCK_TYPES - - # convert all fields to a dict and filter out parent and children field - fields = { - field: field_value.read_from(item) - for (field, field_value) in item.fields.items() - if field not in ['parent', 'children'] - } - - course_key = item.scope_ids.usage_id.course_key - block_type = item.scope_ids.block_type - - # set or reset some defaults - fields['edited_on'] = str(getattr(item, 'edited_on', '')) - fields['display_name'] = item.display_name_with_default - fields['org'] = course_key.org - fields['course'] = course_key.course - fields['run'] = course_key.run - fields['course_key'] = str(course_key) - fields['location'] = str(item.location) - fields['block_type'] = block_type - fields['detached'] = block_type in DETACHED_XBLOCK_TYPES - - if block_type == 'course': - # prune the checklists field - if 'checklists' in fields: - del fields['checklists'] - - # record the time this command was run - fields['time_last_dumped_to_neo4j'] = str(timezone.now()) - - return fields, block_type - - -def coerce_types(value): - """ - Args: - value: the value of an xblock's field - - Returns: either the value, a text version of the value, or, if the - value is a list, a list where each element is converted to text. - """ - coerced_value = value - if isinstance(value, list): - coerced_value = [str(element) for element in coerced_value] - - # if it's not one of the types that neo4j accepts, - # just convert it to text - elif not isinstance(value, PRIMITIVE_NEO4J_TYPES): - coerced_value = str(value) - - return coerced_value - - -def add_to_transaction(neo4j_entities, transaction): - """ - Args: - neo4j_entities: a list of Nodes or Relationships - transaction: a neo4j transaction - """ - for entity in neo4j_entities: - transaction.create(entity) - - -def get_command_last_run(course_key, graph): - """ - This information is stored on the course node of a course in neo4j - Args: - course_key: a CourseKey - graph: a py2neo Graph - - Returns: The datetime that the command was last run, converted into - text, or None, if there's no record of this command last being run. - """ - matcher = NodeMatcher(graph) - course_node = matcher.match( - "course", - course_key=str(course_key) - ).first() - - last_this_command_was_run = None - if course_node: - last_this_command_was_run = course_node['time_last_dumped_to_neo4j'] - - return last_this_command_was_run - - -def get_course_last_published(course_key): - """ - Approximately when was a course last published? - - We use the 'modified' column in the CourseOverview table as a quick and easy - (although perhaps inexact) way of determining when a course was last - published. This works because CourseOverview rows are re-written upon - course publish. - - Args: - course_key: a CourseKey - - Returns: The datetime the course was last published at, stringified. - Uses Python's default str(...) implementation for datetimes, which - is sortable and similar to ISO 8601: - https://docs.python.org/3/library/datetime.html#datetime.date.__str__ - """ - # Import is placed here to avoid model import at project startup. - from openedx.core.djangoapps.content.course_overviews.models import CourseOverview - - approx_last_published = CourseOverview.get_from_id(course_key).modified - return str(approx_last_published) - - -def strip_branch_and_version(location): - """ - Removes the branch and version information from a location. - Args: - location: an xblock's location. - Returns: that xblock's location without branch and version information. - """ - return location.for_branch(None) - - -def serialize_course(course_id): - """ - Serializes a course into py2neo Nodes and Relationships - Args: - course_id: CourseKey of the course we want to serialize - - Returns: - nodes: a list of py2neo Node objects - relationships: a list of py2neo Relationships objects - """ - # Import is placed here to avoid model import at project startup. - from xmodule.modulestore.django import modulestore - - # create a location to node mapping we'll need later for - # writing relationships - location_to_node = {} - items = modulestore().get_items(course_id) - - # create nodes - for item in items: - fields, block_type = serialize_item(item) - - for field_name, value in fields.items(): - fields[field_name] = coerce_types(value) - - node = Node(block_type, 'item', **fields) - location_to_node[strip_branch_and_version(item.location)] = node - - # create relationships - relationships = [] - for item in items: - previous_child_node = None - for index, child in enumerate(item.get_children()): - parent_node = location_to_node.get(strip_branch_and_version(item.location)) - child_node = location_to_node.get(strip_branch_and_version(child.location)) - - if parent_node is not None and child_node is not None: - child_node["index"] = index - - relationship = Relationship(parent_node, "PARENT_OF", child_node) - relationships.append(relationship) - - if previous_child_node: - ordering_relationship = Relationship( - previous_child_node, - "PRECEDES", - child_node, - ) - relationships.append(ordering_relationship) - previous_child_node = child_node - - nodes = list(location_to_node.values()) - return nodes, relationships - - -def should_dump_course(course_key, graph): - """ - Only dump the course if it's been changed since the last time it's been - dumped. - Args: - course_key: a CourseKey object. - graph: a py2neo Graph object. - - Returns: - - whether this course should be dumped to neo4j (bool) - - reason why course needs to be dumped (string, None if doesn't need to be dumped) - """ - - last_this_command_was_run = get_command_last_run(course_key, graph) - - course_last_published_date = get_course_last_published(course_key) - - # if we don't have a record of the last time this command was run, - # we should serialize the course and dump it - if last_this_command_was_run is None: - return ( - True, - "no record of the last neo4j update time for the course" - ) - - # if we've serialized the course recently and we have no published - # events, we will not dump it, and so we can skip serializing it - # again here - if last_this_command_was_run and course_last_published_date is None: - return (False, None) - - # otherwise, serialize and dump the course if the command was run - # before the course's last published event - needs_update = last_this_command_was_run < course_last_published_date - update_reason = None - if needs_update: - update_reason = ( - f"course has been published since last neo4j update time - " - f"update date {last_this_command_was_run} < published date {course_last_published_date}" - ) - return (needs_update, update_reason) - - -@shared_task -@set_code_owner_attribute -def dump_course_to_neo4j(course_key_string, connection_overrides=None): - """ - Serializes a course and writes it to neo4j. - - Arguments: - course_key_string: course key for the course to be exported - connection_overrides (dict): overrides to Neo4j connection - parameters specified in `settings.COURSEGRAPH_CONNECTION`. - """ - course_key = CourseKey.from_string(course_key_string) - nodes, relationships = serialize_course(course_key) - celery_log.info( - "Now dumping %s to neo4j: %d nodes and %d relationships", - course_key, - len(nodes), - len(relationships), - ) - - graph = authenticate_and_create_graph( - connection_overrides=connection_overrides - ) - - transaction = graph.begin() - course_string = str(course_key) - try: - # first, delete existing course - transaction.run( - "MATCH (n:item) WHERE n.course_key='{}' DETACH DELETE n".format( - course_string - ) - ) - - # now, re-add it - add_to_transaction(nodes, transaction) - add_to_transaction(relationships, transaction) - graph.commit(transaction) - celery_log.info("Completed dumping %s to neo4j", course_key) - - except Exception: # pylint: disable=broad-except - celery_log.exception( - "Error trying to dump course %s to neo4j, rolling back", - course_string - ) - graph.rollback(transaction) - - -class ModuleStoreSerializer: - """ - Class with functionality to serialize a modulestore into subgraphs, - one graph per course. - """ - - def __init__(self, course_keys): - self.course_keys = course_keys - - @classmethod - def create(cls, courses=None, skip=None): - """ - Sets the object's course_keys attribute from the `courses` parameter. - If that parameter isn't furnished, loads all course_keys from the - modulestore. - Filters out course_keys in the `skip` parameter, if provided. - Args: - courses: A list of string serializations of course keys. - For example, ["course-v1:org+course+run"]. - skip: Also a list of string serializations of course keys. - """ - # Import is placed here to avoid model import at project startup. - from xmodule.modulestore.django import modulestore - if courses: - course_keys = [CourseKey.from_string(course.strip()) for course in courses] - else: - course_keys = [ - course.id for course in modulestore().get_course_summaries() - ] - if skip is not None: - skip_keys = [CourseKey.from_string(course.strip()) for course in skip] - course_keys = [course_key for course_key in course_keys if course_key not in skip_keys] - return cls(course_keys) - - def dump_courses_to_neo4j(self, connection_overrides=None, override_cache=False): - """ - Method that iterates through a list of courses in a modulestore, - serializes them, then submits tasks to write them to neo4j. - Arguments: - connection_overrides (dict): overrides to Neo4j connection - parameters specified in `settings.COURSEGRAPH_CONNECTION`. - override_cache: serialize the courses even if they'be been recently - serialized - - Returns: two lists--one of the courses that were successfully written - to neo4j and one of courses that were not. - """ - - total_number_of_courses = len(self.course_keys) - - submitted_courses = [] - skipped_courses = [] - - graph = authenticate_and_create_graph(connection_overrides) - - for index, course_key in enumerate(self.course_keys): - # first, clear the request cache to prevent memory leaks - RequestCache.clear_all_namespaces() - - (needs_dump, reason) = should_dump_course(course_key, graph) - if not (override_cache or needs_dump): - log.info("skipping submitting %s, since it hasn't changed", course_key) - skipped_courses.append(str(course_key)) - continue - - if override_cache: - reason = "override_cache is True" - - log.info( - "Now submitting %s for export to neo4j, because %s: course %d of %d total courses", - course_key, - reason, - index + 1, - total_number_of_courses, - ) - - dump_course_to_neo4j.apply_async( - kwargs=dict( - course_key_string=str(course_key), - connection_overrides=connection_overrides, - ) - ) - submitted_courses.append(str(course_key)) - - return submitted_courses, skipped_courses - - -def authenticate_and_create_graph(connection_overrides=None): - """ - This function authenticates with neo4j and creates a py2neo graph object - - Arguments: - connection_overrides (dict): overrides to Neo4j connection - parameters specified in `settings.COURSEGRAPH_CONNECTION`. - - Returns: a py2neo `Graph` object. - """ - provided_overrides = { - key: value - for key, value in (connection_overrides or {}).items() - # Drop overrides whose values are `None`. Note that `False` is a - # legitimate override value that we don't want to drop here. - if value is not None - } - connection_with_overrides = {**settings.COURSEGRAPH_CONNECTION, **provided_overrides} - return Graph(**connection_with_overrides) diff --git a/cms/djangoapps/coursegraph/tests/__init__.py b/cms/djangoapps/coursegraph/tests/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/cms/djangoapps/coursegraph/tests/test_admin.py b/cms/djangoapps/coursegraph/tests/test_admin.py deleted file mode 100644 index 21a26d84505b..000000000000 --- a/cms/djangoapps/coursegraph/tests/test_admin.py +++ /dev/null @@ -1,227 +0,0 @@ -""" -Shallow tests for CourseGraph dump-queueing Django admin interface. - -See ..management.commands.tests.test_dump_to_neo4j for more comprehensive -tests of dump_course_to_neo4j. -""" - -from unittest import mock - -import py2neo -from django.test import TestCase -from django.test.utils import override_settings -from freezegun import freeze_time - -from openedx.core.djangoapps.content.course_overviews.tests.factories import CourseOverviewFactory -from openedx.core.djangoapps.content.course_overviews.models import CourseOverview - -from .. import admin, tasks - - -_coursegraph_connection = { - "protocol": "bolt", - "secure": True, - "host": "example.edu", - "port": 7687, - "user": "neo4j", - "password": "fake-coursegraph-password", -} - -_configure_coursegraph_connection = override_settings( - COURSEGRAPH_CONNECTION=_coursegraph_connection, -) - -_patch_log_exception = mock.patch.object( - admin.log, 'exception', autospec=True -) - -_patch_apply_dump_task = mock.patch.object( - tasks.dump_course_to_neo4j, 'apply_async' -) - -_pretend_last_course_dump_was_may_2020 = mock.patch.object( - tasks, - 'get_command_last_run', - new=(lambda _key, _graph: "2020-05-01"), -) - -_patch_neo4j_graph = mock.patch.object( - tasks, 'Graph', autospec=True -) - -_make_neo4j_graph_raise = mock.patch.object( - tasks, 'Graph', side_effect=py2neo.ConnectionUnavailable( - 'we failed to connect or something!' - ) -) - - -class CourseGraphAdminActionsTestCase(TestCase): - """ - Test CourseGraph Django admin actions. - """ - - @classmethod - def setUpTestData(cls): - """ - Make course overviews with varying modification dates. - """ - super().setUpTestData() - cls.course_updated_in_april = CourseOverviewFactory(run='april_update') - cls.course_updated_in_june = CourseOverviewFactory(run='june_update') - cls.course_updated_in_july = CourseOverviewFactory(run='july_update') - cls.course_updated_in_august = CourseOverviewFactory(run='august_update') - - # For each course overview, make an arbitrary update and then save() - # so that its `.modified` date is set. - with freeze_time("2020-04-01"): - cls.course_updated_in_april.marketing_url = "https://example.org" - cls.course_updated_in_april.save() - with freeze_time("2020-06-01"): - cls.course_updated_in_june.marketing_url = "https://example.org" - cls.course_updated_in_june.save() - with freeze_time("2020-07-01"): - cls.course_updated_in_july.marketing_url = "https://example.org" - cls.course_updated_in_july.save() - with freeze_time("2020-08-01"): - cls.course_updated_in_august.marketing_url = "https://example.org" - cls.course_updated_in_august.save() - - @_configure_coursegraph_connection - @_pretend_last_course_dump_was_may_2020 - @_patch_neo4j_graph - @_patch_apply_dump_task - @_patch_log_exception - def test_dump_courses(self, mock_log_exception, mock_apply_dump_task, mock_neo4j_graph): - """ - Test that dump_courses admin action dumps requested courses iff they have - been modified since the last dump to coursegraph. - """ - modeladmin_mock = mock.MagicMock() - - # Request all courses except the August-updated one - requested_course_keys = { - str(self.course_updated_in_april.id), - str(self.course_updated_in_june.id), - str(self.course_updated_in_july.id), - } - admin.dump_courses( - modeladmin=modeladmin_mock, - request=mock.MagicMock(), - queryset=CourseOverview.objects.filter(id__in=requested_course_keys), - ) - - # User should have been messaged - assert modeladmin_mock.message_user.call_count == 1 - assert modeladmin_mock.message_user.call_args.args[1] == ( - "Enqueued dumps for 2 course(s). Skipped 1 unchanged course(s)." - ) - - # For enqueueing, graph should've been authenticated once, using configured settings. - assert mock_neo4j_graph.call_count == 1 - assert mock_neo4j_graph.call_args.args == () - assert mock_neo4j_graph.call_args.kwargs == _coursegraph_connection - - # No errors should've been logged. - assert mock_log_exception.call_count == 0 - - # April course should have been skipped because the command was last run in May. - # Dumps for June and July courses should have been enqueued. - assert mock_apply_dump_task.call_count == 2 - actual_dumped_course_keys = { - call_args.kwargs['kwargs']['course_key_string'] - for call_args in mock_apply_dump_task.call_args_list - } - expected_dumped_course_keys = { - str(self.course_updated_in_june.id), - str(self.course_updated_in_july.id), - } - assert actual_dumped_course_keys == expected_dumped_course_keys - - @_configure_coursegraph_connection - @_pretend_last_course_dump_was_may_2020 - @_patch_neo4j_graph - @_patch_apply_dump_task - @_patch_log_exception - def test_dump_courses_overriding_cache(self, mock_log_exception, mock_apply_dump_task, mock_neo4j_graph): - """ - Test that dump_coursese_overriding_cach admin action dumps requested courses - whether or not they been modified since the last dump to coursegraph. - """ - modeladmin_mock = mock.MagicMock() - - # Request all courses except the August-updated one - requested_course_keys = { - str(self.course_updated_in_april.id), - str(self.course_updated_in_june.id), - str(self.course_updated_in_july.id), - } - admin.dump_courses_overriding_cache( - modeladmin=modeladmin_mock, - request=mock.MagicMock(), - queryset=CourseOverview.objects.filter(id__in=requested_course_keys), - ) - - # User should have been messaged - assert modeladmin_mock.message_user.call_count == 1 - assert modeladmin_mock.message_user.call_args.args[1] == ( - "Enqueued dumps for 3 course(s)." - ) - - # For enqueueing, graph should've been authenticated once, using configured settings. - assert mock_neo4j_graph.call_count == 1 - assert mock_neo4j_graph.call_args.args == () - assert mock_neo4j_graph.call_args.kwargs == _coursegraph_connection - - # No errors should've been logged. - assert mock_log_exception.call_count == 0 - - # April, June, and July courses should have all been dumped. - assert mock_apply_dump_task.call_count == 3 - actual_dumped_course_keys = { - call_args.kwargs['kwargs']['course_key_string'] - for call_args in mock_apply_dump_task.call_args_list - } - expected_dumped_course_keys = { - str(self.course_updated_in_april.id), - str(self.course_updated_in_june.id), - str(self.course_updated_in_july.id), - } - assert actual_dumped_course_keys == expected_dumped_course_keys - - @_configure_coursegraph_connection - @_pretend_last_course_dump_was_may_2020 - @_make_neo4j_graph_raise - @_patch_apply_dump_task - @_patch_log_exception - def test_dump_courses_error(self, mock_log_exception, mock_apply_dump_task, mock_neo4j_graph): - """ - Test that the dump_courses admin action dumps messages the user if an error - occurs when trying to enqueue course dumps. - """ - modeladmin_mock = mock.MagicMock() - - # Request dump of all four courses. - admin.dump_courses( - modeladmin=modeladmin_mock, - request=mock.MagicMock(), - queryset=CourseOverview.objects.all() - ) - - # Admin user should have been messaged about failure. - assert modeladmin_mock.message_user.call_count == 1 - assert modeladmin_mock.message_user.call_args.args[1] == ( - "Error enqueueing dumps for 4 course(s): we failed to connect or something!" - ) - - # For enqueueing, graph should've been authenticated once, using configured settings. - assert mock_neo4j_graph.call_count == 1 - assert mock_neo4j_graph.call_args.args == () - assert mock_neo4j_graph.call_args.kwargs == _coursegraph_connection - - # Exception should have been logged. - assert mock_log_exception.call_count == 1 - assert "Failed to enqueue" in mock_log_exception.call_args.args[0] - - # No courses should have been dumped. - assert mock_apply_dump_task.call_count == 0 diff --git a/cms/envs/common.py b/cms/envs/common.py index ebc101801fe2..efd06351a7f5 100644 --- a/cms/envs/common.py +++ b/cms/envs/common.py @@ -1755,9 +1755,6 @@ # edx-milestones service 'milestones', - # Coursegraph - 'cms.djangoapps.coursegraph.apps.CoursegraphConfig', - # Credit courses 'openedx.core.djangoapps.credit.apps.CreditConfig', @@ -2441,40 +2438,6 @@ # 11 grade designations are used by the UI, so it's advisable to restrict the list to 11 items. DEFAULT_GRADE_DESIGNATIONS = ['A', 'B', 'C', 'D'] -############## Settings for CourseGraph ############################ - -# .. setting_name: COURSEGRAPH_JOB_QUEUE -# .. setting_default: value of LOW_PRIORITY_QUEUE -# .. setting_description: The name of the Celery queue to which CourseGraph refresh -# tasks will be sent -COURSEGRAPH_JOB_QUEUE: str = LOW_PRIORITY_QUEUE - -# .. setting_name: COURSEGRAPH_CONNECTION -# .. setting_default: 'bolt+s://localhost:7687', in dictionary form. -# .. setting_description: Dictionary specifying Neo4j connection parameters for -# CourseGraph refresh. Accepted keys are protocol ('bolt' or 'http'), -# secure (bool), host (str), port (int), user (str), and password (str). -# See https://py2neo.org/2021.1/profiles.html#individual-settings for a -# a description of each of those keys. -COURSEGRAPH_CONNECTION: dict = { - "protocol": "bolt", - "secure": True, - "host": "localhost", - "port": 7687, - "user": "neo4j", - "password": None, -} - -# .. toggle_name: COURSEGRAPH_DUMP_COURSE_ON_PUBLISH -# .. toggle_implementation: DjangoSetting -# .. toggle_creation_date: 2022-01-27 -# .. toggle_use_cases: open_edx -# .. toggle_default: False -# .. toggle_description: Whether, upon publish, a course should automatically -# be exported to Neo4j via the connection parameters specified in -# `COURSEGRAPH_CONNECTION`. -COURSEGRAPH_DUMP_COURSE_ON_PUBLISH: bool = False - ########## Settings for video transcript migration tasks ############ VIDEO_TRANSCRIPT_MIGRATIONS_JOB_QUEUE = DEFAULT_PRIORITY_QUEUE diff --git a/cms/envs/devstack.py b/cms/envs/devstack.py index faacbc1548d0..e944d67eda1b 100644 --- a/cms/envs/devstack.py +++ b/cms/envs/devstack.py @@ -261,17 +261,6 @@ def should_show_debug_toolbar(request): # lint-amnesty, pylint: disable=missing # (ref MST-637) PROCTORING_USER_OBFUSCATION_KEY = '85920908f28904ed733fe576320db18cabd7b6cd' -############## CourseGraph devstack settings ############################ - -COURSEGRAPH_CONNECTION: dict = { - "protocol": "bolt", - "secure": False, - "host": "edx.devstack.coursegraph", - "port": 7687, - "user": "neo4j", - "password": "edx", -} - #################### Webpack Configuration Settings ############################## WEBPACK_LOADER['DEFAULT']['TIMEOUT'] = 5 diff --git a/cms/envs/production.py b/cms/envs/production.py index f65b2204328b..cf2a7d2f3fad 100644 --- a/cms/envs/production.py +++ b/cms/envs/production.py @@ -517,9 +517,6 @@ def get_env_setting(setting): if FEATURES.get('CUSTOM_COURSES_EDX'): INSTALLED_APPS.append('openedx.core.djangoapps.ccxcon.apps.CCXConnectorConfig') -############## Settings for CourseGraph ############################ -COURSEGRAPH_JOB_QUEUE = ENV_TOKENS.get('COURSEGRAPH_JOB_QUEUE', LOW_PRIORITY_QUEUE) - ########## Settings for video transcript migration tasks ############ VIDEO_TRANSCRIPT_MIGRATIONS_JOB_QUEUE = ENV_TOKENS.get('VIDEO_TRANSCRIPT_MIGRATIONS_JOB_QUEUE', DEFAULT_PRIORITY_QUEUE) @@ -616,8 +613,6 @@ def get_env_setting(setting): 'queue': SINGLE_LEARNER_COURSE_REGRADE_ROUTING_KEY}, 'cms.djangoapps.contentstore.tasks.update_search_index': { 'queue': UPDATE_SEARCH_INDEX_JOB_QUEUE}, - 'cms.djangoapps.coursegraph.tasks.dump_course_to_neo4j': { - 'queue': COURSEGRAPH_JOB_QUEUE}, } LOGO_IMAGE_EXTRA_TEXT = ENV_TOKENS.get('LOGO_IMAGE_EXTRA_TEXT', '') diff --git a/setup.cfg b/setup.cfg index fe776ec580a4..3af063a77666 100644 --- a/setup.cfg +++ b/setup.cfg @@ -114,7 +114,7 @@ ignore_imports = # -> openedx.core.djangoapps.course_groups.partition_scheme # -> lms.djangoapps.courseware.masquerade openedx.core.djangoapps.course_groups.partition_scheme -> lms.djangoapps.courseware.masquerade - # cms.djangoapps.contentstore.[various] & cms.djangoapps.coursegraph.[various] + # cms.djangoapps.contentstore.[various] # -> openedx.core.djangoapps.content.course_overviews.models # -> lms.djangoapps.ccx.utils # & lms.djangoapps.certificates.api