diff --git a/.github/workflows/unit-test-shards.json b/.github/workflows/unit-test-shards.json index f7cd11bf2c34..838d0fe261f2 100644 --- a/.github/workflows/unit-test-shards.json +++ b/.github/workflows/unit-test-shards.json @@ -99,7 +99,6 @@ "openedx/core/djangoapps/course_apps/", "openedx/core/djangoapps/course_date_signals/", "openedx/core/djangoapps/course_groups/", - "openedx/core/djangoapps/coursegraph/", "openedx/core/djangoapps/courseware_api/", "openedx/core/djangoapps/crawlers/", "openedx/core/djangoapps/credentials/", @@ -181,7 +180,6 @@ "openedx/core/djangoapps/course_apps/", "openedx/core/djangoapps/course_date_signals/", "openedx/core/djangoapps/course_groups/", - "openedx/core/djangoapps/coursegraph/", "openedx/core/djangoapps/courseware_api/", "openedx/core/djangoapps/crawlers/", "openedx/core/djangoapps/credentials/", @@ -240,6 +238,7 @@ "paths": [ "cms/djangoapps/api/", "cms/djangoapps/cms_user_tasks/", + "cms/djangoapps/coursegraph/", "cms/djangoapps/course_creators/", "cms/djangoapps/export_course_metadata/", "cms/djangoapps/maintenance/", diff --git a/cms/djangoapps/contentstore/signals/handlers.py b/cms/djangoapps/contentstore/signals/handlers.py index a67f9583814a..9945277a1379 100644 --- a/cms/djangoapps/contentstore/signals/handlers.py +++ b/cms/djangoapps/contentstore/signals/handlers.py @@ -5,6 +5,7 @@ from datetime import datetime from functools import wraps +from django.conf import settings from django.core.cache import cache from django.dispatch import receiver from pytz import UTC @@ -55,6 +56,9 @@ def listen_for_course_publish(sender, course_key, **kwargs): # pylint: disable= update_search_index, update_special_exams_and_publish ) + from cms.djangoapps.coursegraph.tasks import ( + dump_course_to_neo4j + ) # register special exams asynchronously course_key_str = str(course_key) @@ -64,6 +68,10 @@ def listen_for_course_publish(sender, course_key, **kwargs): # pylint: disable= # Push the course outline to learning_sequences asynchronously. update_outline_from_modulestore_task.delay(course_key_str) + if settings.COURSEGRAPH_DUMP_COURSE_ON_PUBLISH: + # Push the course out to CourseGraph asynchronously. + dump_course_to_neo4j.delay(course_key_str) + # Finally, call into the course search subsystem # to kick off an indexing action if CoursewareSearchIndexer.indexing_is_enabled() and CourseAboutSearchIndexer.indexing_is_enabled(): diff --git a/cms/djangoapps/coursegraph/README.rst b/cms/djangoapps/coursegraph/README.rst new file mode 100644 index 000000000000..18f0f60bdd33 --- /dev/null +++ b/cms/djangoapps/coursegraph/README.rst @@ -0,0 +1,120 @@ + +CourseGraph Support +------------------- + +This app exists to write data to "CourseGraph", a tool enabling Open edX developers and support specialists to inspect their platform instance's learning content. CourseGraph itself is simply an instance of `Neo4j`_, which is an open-source graph database with a Web interface. + +.. _Neo4j: https://neo4j.com + +Deploying Coursegraph +===================== + +There are two ways to deploy CourseGraph: + +* For operators using Tutor, there is a `CourseGraph plugin for Tutor`_ that is currently released as "Beta". Nutmeg is the earliest Open edX release that the plugin will work alongside. + +* For operators still using the old Ansible installation pathway, there exists a `neo4j Ansible playbook`_. Be warned that this method is not well-documented nor officially supported. + +In order for CourseGraph to have queryable, up-to-date data, learning content from CMS must be written to CourseGraph regularly. That is where this Django app comes into play. For details on the various ways to write CMS data to CourseGraph, visit the `operations section of the CourseGraph Tutor plugin docs`_. + +**Please note**: Access to a populated CourseGraph instance confers access to all the learning content in the associated Open edX CMS (Studio). The basic authentication provided by Neo4j may or may not be sufficient for your security needs. Consider taking additional security measures, such as restricting CourseGraph access to only users on a private VPN. + +.. _neo4j Ansible playbook: https://github.com/edx/configuration/blob/master/playbooks/neo4j.yml + +.. _CourseGraph plugin for Tutor: https://github.com/openedx/tutor-contrib-coursegraph/ + +.. _operations section of the CourseGraph Tutor plugin docs: https://github.com/openedx/tutor-contrib-coursegraph/#managing-data + +Running CourseGraph locally +=========================== + +In some circumstances, you may want to run CourseGraph locally, connected to a development-mode Open edX instance. You can do this in both Tutor and Devstack. + +Tutor +***** + +The `CourseGraph plugin for Tutor`_ makes it easy to install, configure, and run CourseGraph for local development. + +Devstack +******** + +CourseGraph is included as an "extra" component in the `Open edX Devstack`_. That is, it is not run or provisioned by default, but can be enabled on-demand. + +To provision Devstack CourseGraph with data from Devstack LMS, run:: + + make dev.provision.coursegraph + +CourseGraph should now be accessible at http://localhost:7474 with the username ``neo4j`` and the password ``edx``. + +Under the hood, the provisioning command just invokes ``dump_to_neo4j`` on your LMS, pointed at your CourseGraph. The provisioning command can be run again at any point in the future to refresh CourseGraph with new LMS data. The data in CourseGraph will persist unless you explicitly destroy it (as noted below). + +Other Devstack CourseGraph commands include:: + + make dev.up.coursegraph # Bring up the container (without re-provisioning). + make dev.down.coursegraph # Stop and remove the container. + make dev.shell.coursegraph # Start a shell session in the container. + make dev.attach.coursegraph # Attach to the container. + make dev.destroy.coursegraph # Stop the container and destroy its database. + +The above commands should be run in your ``devstack`` folder, and they assume that LMS is already properly provisioned. See the `Devstack interface`_ for more details. + +.. _Open edX Devstack: https://github.com/edx/devstack/ +.. _Devstack interface: https://edx.readthedocs.io/projects/open-edx-devstack/en/latest/devstack_interface.html + + +Querying Coursegraph +==================== + +CourseGraph is queryable using the `Cypher`_ query language. Open edX learning content is represented in Neo4j using a straightforward scheme: + +* A node is an XBlock usage. + +* Nodes are tagged with their ``block_type``, such as: + + * ``course`` + * ``chapter`` + * ``sequential`` + * ``vertical`` + * ``problem`` + * ``html`` + * etc. + +* Every node is also tagged with ``item``. + +* Parent-child relationships in the course hierarchy are reflected in the ``PARENT_OF`` relationship. + +* Ordered sibling relationships in the course hierarchy are reflected in the ``PRECEDES`` relationship. + +* Fields on each XBlock usage (``.display_name``, ``.data``, etc) are available on the corresponding node. + +.. _Cypher: https://neo4j.com/developer/cypher/ + + +Example Queries +*************** + +How many XBlocks exist in the LMS, by type? :: + + MATCH + (c:course) -[:PARENT_OF*]-> (n:item) + RETURN + distinct(n.block_type) as block_type, + count(n) as number + order by + number DESC + + +In a given course, which units contain problems with custom Python grading code? :: + + MATCH + (c:course) -[:PARENT_OF*]-> (u:vertical) -[:PARENT_OF*]-> (p:problem) + WHERE + p.data CONTAINS 'loncapa/python' + AND + c.course_key = '' + RETURN + u.location + +You can see many more examples of useful CourseGraph queries on the `query archive wiki page`_. + +.. _query archive wiki page: https://openedx.atlassian.net/wiki/spaces/COMM/pages/3273228388/Useful+CourseGraph+Queries diff --git a/openedx/core/djangoapps/coursegraph/__init__.py b/cms/djangoapps/coursegraph/__init__.py similarity index 100% rename from openedx/core/djangoapps/coursegraph/__init__.py rename to cms/djangoapps/coursegraph/__init__.py diff --git a/cms/djangoapps/coursegraph/admin.py b/cms/djangoapps/coursegraph/admin.py new file mode 100644 index 000000000000..f79fa909d286 --- /dev/null +++ b/cms/djangoapps/coursegraph/admin.py @@ -0,0 +1,123 @@ +""" +Admin site bindings for coursegraph +""" +import logging + +from django.contrib import admin, messages +from django.utils.translation import gettext as _ +from edx_django_utils.admin.mixins import ReadOnlyAdminMixin + +from .models import CourseGraphCourseDump +from .tasks import ModuleStoreSerializer + +log = logging.getLogger(__name__) + + +@admin.action( + permissions=['change'], + description=_("Dump courses to CourseGraph (respect cache)"), +) +def dump_courses(modeladmin, request, queryset): + """ + Admin action to enqueue Dump-to-CourseGraph tasks for a set of courses, + excluding courses that haven't been published since they were last dumped. + + queryset is a QuerySet of CourseGraphCourseDump objects, which are just + CourseOverview objects under the hood. + """ + all_course_keys = queryset.values_list('id', flat=True) + serializer = ModuleStoreSerializer(all_course_keys) + try: + submitted, skipped = serializer.dump_courses_to_neo4j() + # Unfortunately there is no unified base class for the reasonable + # exceptions we could expect from py2neo (connection unavailable, bolt protocol + # error, and so on), so we just catch broadly, show a generic error banner, + # and then log the exception for site operators to look at. + except Exception as err: # pylint: disable=broad-except + log.exception( + "Failed to enqueue CourseGraph dumps to Neo4j (respecting cache): %s", + ", ".join(str(course_key) for course_key in all_course_keys), + ) + modeladmin.message_user( + request, + _("Error enqueueing dumps for {} course(s): {}").format( + len(all_course_keys), str(err) + ), + level=messages.ERROR, + ) + return + if submitted: + modeladmin.message_user( + request, + _( + "Enqueued dumps for {} course(s). Skipped {} unchanged course(s)." + ).format(len(submitted), len(skipped)), + level=messages.SUCCESS, + ) + else: + modeladmin.message_user( + request, + _( + "Skipped all {} course(s), as they were unchanged.", + ).format(len(skipped)), + level=messages.WARNING, + ) + + +@admin.action( + permissions=['change'], + description=_("Dump courses to CourseGraph (override cache)") +) +def dump_courses_overriding_cache(modeladmin, request, queryset): + """ + Admin action to enqueue Dump-to-CourseGraph tasks for a set of courses + (whether or not they have been published recently). + + queryset is a QuerySet of CourseGraphCourseDump objects, which are just + CourseOverview objects under the hood. + """ + all_course_keys = queryset.values_list('id', flat=True) + serializer = ModuleStoreSerializer(all_course_keys) + try: + submitted, _skipped = serializer.dump_courses_to_neo4j(override_cache=True) + # Unfortunately there is no unified base class for the reasonable + # exceptions we could expect from py2neo (connection unavailable, bolt protocol + # error, and so on), so we just catch broadly, show a generic error banner, + # and then log the exception for site operators to look at. + except Exception as err: # pylint: disable=broad-except + log.exception( + "Failed to enqueue CourseGraph Neo4j course dumps (overriding cache): %s", + ", ".join(str(course_key) for course_key in all_course_keys), + ) + modeladmin.message_user( + request, + _("Error enqueueing dumps for {} course(s): {}").format( + len(all_course_keys), str(err) + ), + level=messages.ERROR, + ) + return + modeladmin.message_user( + request, + _("Enqueued dumps for {} course(s).").format(len(submitted)), + level=messages.SUCCESS, + ) + + +@admin.register(CourseGraphCourseDump) +class CourseGraphCourseDumpAdmin(ReadOnlyAdminMixin, admin.ModelAdmin): + """ + Model admin for "Course graph course dumps". + + Just a read-only table with some useful metadata, allowing admin users to + select courses to be dumped to CourseGraph. + """ + list_display = [ + 'id', + 'display_name', + 'modified', + 'enrollment_start', + 'enrollment_end', + ] + search_fields = ['id', 'display_name'] + actions = [dump_courses, dump_courses_overriding_cache] diff --git a/openedx/core/djangoapps/coursegraph/apps.py b/cms/djangoapps/coursegraph/apps.py similarity index 65% rename from openedx/core/djangoapps/coursegraph/apps.py rename to cms/djangoapps/coursegraph/apps.py index ecedf33d19ca..95d7873fce46 100644 --- a/openedx/core/djangoapps/coursegraph/apps.py +++ b/cms/djangoapps/coursegraph/apps.py @@ -12,6 +12,6 @@ class CoursegraphConfig(AppConfig): """ AppConfig for courseware app """ - name = 'openedx.core.djangoapps.coursegraph' + name = 'cms.djangoapps.coursegraph' - from openedx.core.djangoapps.coursegraph import tasks + from cms.djangoapps.coursegraph import tasks diff --git a/openedx/core/djangoapps/coursegraph/management/__init__.py b/cms/djangoapps/coursegraph/management/__init__.py similarity index 100% rename from openedx/core/djangoapps/coursegraph/management/__init__.py rename to cms/djangoapps/coursegraph/management/__init__.py diff --git a/openedx/core/djangoapps/coursegraph/management/commands/__init__.py b/cms/djangoapps/coursegraph/management/commands/__init__.py similarity index 100% rename from openedx/core/djangoapps/coursegraph/management/commands/__init__.py rename to cms/djangoapps/coursegraph/management/commands/__init__.py diff --git a/cms/djangoapps/coursegraph/management/commands/dump_to_neo4j.py b/cms/djangoapps/coursegraph/management/commands/dump_to_neo4j.py new file mode 100644 index 000000000000..40afe7ffbe7d --- /dev/null +++ b/cms/djangoapps/coursegraph/management/commands/dump_to_neo4j.py @@ -0,0 +1,114 @@ +""" +This file contains a management command for exporting the modulestore to +Neo4j, a graph database. + +Example usages: + + # Dump all courses published since last dump. + # Use connection parameters from `settings.COURSEGRAPH_SETTINGS`. + python manage.py cms dump_to_neo4j + + # Dump all courses published since last dump. + # Use custom connection parameters. + python manage.py cms dump_to_neo4j --host localhost --port 7473 \ + --secure --user user --password password + + # Specify certain courses instead of dumping all of them. + # Use connection parameters from `settings.COURSEGRAPH_SETTINGS`. + python manage.py cms dump_to_neo4j --courses 'course-v1:A+B+1' 'course-v1:A+B+2' +""" + + +import logging +from textwrap import dedent + +from django.core.management.base import BaseCommand + +from cms.djangoapps.coursegraph.tasks import ModuleStoreSerializer + +log = logging.getLogger(__name__) + + +class Command(BaseCommand): + """ + Dump recently-published course(s) over to a CourseGraph (Neo4j) instance. + """ + help = dedent(__doc__).strip() + + def add_arguments(self, parser): + parser.add_argument( + '--host', + type=str, + help="the hostname of the Neo4j server", + ) + parser.add_argument( + '--port', + type=int, + help="the port on the Neo4j server that accepts Bolt requests", + ) + parser.add_argument( + '--secure', + action='store_true', + help="connect to server over Bolt/TLS instead of plain unencrypted Bolt", + ) + parser.add_argument( + '--user', + type=str, + help="the username of the Neo4j user", + ) + parser.add_argument( + '--password', + type=str, + help="the password of the Neo4j user", + ) + parser.add_argument( + '--courses', + metavar='KEY', + type=str, + nargs='*', + help="keys of courses to serialize; if omitted all courses in system are serialized", + ) + parser.add_argument( + '--skip', + metavar='KEY', + type=str, + nargs='*', + help="keys of courses to NOT to serialize", + ) + parser.add_argument( + '--override', + action='store_true', + help="dump all courses regardless of when they were last published", + ) + + def handle(self, *args, **options): + """ + Iterates through each course, serializes them into graphs, and saves + those graphs to neo4j. + """ + + mss = ModuleStoreSerializer.create(options['courses'], options['skip']) + connection_overrides = { + key: options[key] + for key in ["host", "port", "secure", "user", "password"] + } + submitted_courses, skipped_courses = mss.dump_courses_to_neo4j( + connection_overrides=connection_overrides, + override_cache=options['override'], + ) + + log.info( + "%d courses submitted for export to neo4j. %d courses skipped.", + len(submitted_courses), + len(skipped_courses), + ) + + if not submitted_courses: + print("No courses submitted for export to neo4j at all!") + return + + if submitted_courses: + print( + "These courses were submitted for export to neo4j successfully:\n\t" + + "\n\t".join(submitted_courses) + ) diff --git a/openedx/core/djangoapps/coursegraph/management/commands/tests/__init__.py b/cms/djangoapps/coursegraph/management/commands/tests/__init__.py similarity index 100% rename from openedx/core/djangoapps/coursegraph/management/commands/tests/__init__.py rename to cms/djangoapps/coursegraph/management/commands/tests/__init__.py diff --git a/openedx/core/djangoapps/coursegraph/management/commands/tests/test_dump_to_neo4j.py b/cms/djangoapps/coursegraph/management/commands/tests/test_dump_to_neo4j.py similarity index 87% rename from openedx/core/djangoapps/coursegraph/management/commands/tests/test_dump_to_neo4j.py rename to cms/djangoapps/coursegraph/management/commands/tests/test_dump_to_neo4j.py index 0d03f56d5fc5..32fef0a88711 100644 --- a/openedx/core/djangoapps/coursegraph/management/commands/tests/test_dump_to_neo4j.py +++ b/cms/djangoapps/coursegraph/management/commands/tests/test_dump_to_neo4j.py @@ -8,15 +8,16 @@ from unittest import mock import ddt from django.core.management import call_command +from django.test.utils import override_settings from edx_toggles.toggles.testutils import override_waffle_switch from xmodule.modulestore.tests.django_utils import SharedModuleStoreTestCase from xmodule.modulestore.tests.factories import CourseFactory, ItemFactory import openedx.core.djangoapps.content.block_structure.config as block_structure_config from openedx.core.djangoapps.content.block_structure.signals import update_block_structure_on_course_publish -from openedx.core.djangoapps.coursegraph.management.commands.dump_to_neo4j import ModuleStoreSerializer -from openedx.core.djangoapps.coursegraph.management.commands.tests.utils import MockGraph, MockNodeMatcher -from openedx.core.djangoapps.coursegraph.tasks import ( +from cms.djangoapps.coursegraph.management.commands.dump_to_neo4j import ModuleStoreSerializer +from cms.djangoapps.coursegraph.management.commands.tests.utils import MockGraph, MockNodeMatcher +from cms.djangoapps.coursegraph.tasks import ( coerce_types, serialize_course, serialize_item, @@ -115,8 +116,8 @@ class TestDumpToNeo4jCommand(TestDumpToNeo4jCommandBase): Tests for the dump to neo4j management command """ - @mock.patch('openedx.core.djangoapps.coursegraph.tasks.NodeMatcher') - @mock.patch('openedx.core.djangoapps.coursegraph.tasks.Graph') + @mock.patch('cms.djangoapps.coursegraph.tasks.NodeMatcher') + @mock.patch('cms.djangoapps.coursegraph.tasks.Graph') @ddt.data(1, 2) def test_dump_specific_courses(self, number_of_courses, mock_graph_class, mock_matcher_class): """ @@ -140,8 +141,8 @@ def test_dump_specific_courses(self, number_of_courses, mock_graph_class, mock_m number_rollbacks=0 ) - @mock.patch('openedx.core.djangoapps.coursegraph.tasks.NodeMatcher') - @mock.patch('openedx.core.djangoapps.coursegraph.tasks.Graph') + @mock.patch('cms.djangoapps.coursegraph.tasks.NodeMatcher') + @mock.patch('cms.djangoapps.coursegraph.tasks.Graph') def test_dump_skip_course(self, mock_graph_class, mock_matcher_class): """ Test that you can skip courses. @@ -166,8 +167,8 @@ def test_dump_skip_course(self, mock_graph_class, mock_matcher_class): number_rollbacks=0, ) - @mock.patch('openedx.core.djangoapps.coursegraph.tasks.NodeMatcher') - @mock.patch('openedx.core.djangoapps.coursegraph.tasks.Graph') + @mock.patch('cms.djangoapps.coursegraph.tasks.NodeMatcher') + @mock.patch('cms.djangoapps.coursegraph.tasks.Graph') def test_dump_skip_beats_specifying(self, mock_graph_class, mock_matcher_class): """ Test that if you skip and specify the same course, you'll skip it. @@ -193,8 +194,8 @@ def test_dump_skip_beats_specifying(self, mock_graph_class, mock_matcher_class): number_rollbacks=0, ) - @mock.patch('openedx.core.djangoapps.coursegraph.tasks.NodeMatcher') - @mock.patch('openedx.core.djangoapps.coursegraph.tasks.Graph') + @mock.patch('cms.djangoapps.coursegraph.tasks.NodeMatcher') + @mock.patch('cms.djangoapps.coursegraph.tasks.Graph') def test_dump_all_courses(self, mock_graph_class, mock_matcher_class): """ Test if you don't specify which courses to dump, then you'll dump @@ -219,6 +220,48 @@ def test_dump_all_courses(self, mock_graph_class, mock_matcher_class): number_rollbacks=0, ) + @mock.patch('cms.djangoapps.coursegraph.tasks.NodeMatcher') + @mock.patch('cms.djangoapps.coursegraph.tasks.Graph', autospec=True) + @override_settings( + COURSEGRAPH_CONNECTION=dict( + protocol='bolt', + host='coursegraph.example.edu', + port=7777, + secure=True, + user="neo4j", + password="default-password", + ) + ) + def test_dump_to_neo4j_connection_defaults(self, mock_graph_class, mock_matcher_class): + """ + Test that user can override individual settings.COURSEGRAPH_CONNECTION parameters + by passing them to `dump_to_neo4j`, whilst falling back to the ones that they + don't override. + """ + self.setup_mock_graph( + mock_matcher_class, mock_graph_class + ) + call_command( + 'dump_to_neo4j', + courses=self.course_strings[:1], + port=7788, + secure=False, + password="overridden-password", + ) + assert mock_graph_class.call_args.args == () + assert mock_graph_class.call_args.kwargs == dict( + + # From settings: + protocol='bolt', + host='coursegraph.example.edu', + user="neo4j", + + # Overriden by command: + port=7788, + secure=False, + password="overridden-password", + ) + class SomeThing: """Just to test the stringification of an object.""" @@ -395,8 +438,8 @@ def test_coerce_types(self, original_value, coerced_expected): coerced_value = coerce_types(original_value) assert coerced_value == coerced_expected - @mock.patch('openedx.core.djangoapps.coursegraph.tasks.NodeMatcher') - @mock.patch('openedx.core.djangoapps.coursegraph.tasks.authenticate_and_create_graph') + @mock.patch('cms.djangoapps.coursegraph.tasks.NodeMatcher') + @mock.patch('cms.djangoapps.coursegraph.tasks.authenticate_and_create_graph') def test_dump_to_neo4j(self, mock_graph_constructor, mock_matcher_class): """ Tests the dump_to_neo4j method works against a mock @@ -423,8 +466,8 @@ def test_dump_to_neo4j(self, mock_graph_constructor, mock_matcher_class): assert len(mock_graph.nodes) == 11 self.assertCountEqual(submitted, self.course_strings) - @mock.patch('openedx.core.djangoapps.coursegraph.tasks.NodeMatcher') - @mock.patch('openedx.core.djangoapps.coursegraph.tasks.authenticate_and_create_graph') + @mock.patch('cms.djangoapps.coursegraph.tasks.NodeMatcher') + @mock.patch('cms.djangoapps.coursegraph.tasks.authenticate_and_create_graph') def test_dump_to_neo4j_rollback(self, mock_graph_constructor, mock_matcher_class): """ Tests that the the dump_to_neo4j method handles the case where there's @@ -447,8 +490,8 @@ def test_dump_to_neo4j_rollback(self, mock_graph_constructor, mock_matcher_class self.assertCountEqual(submitted, self.course_strings) - @mock.patch('openedx.core.djangoapps.coursegraph.tasks.NodeMatcher') - @mock.patch('openedx.core.djangoapps.coursegraph.tasks.authenticate_and_create_graph') + @mock.patch('cms.djangoapps.coursegraph.tasks.NodeMatcher') + @mock.patch('cms.djangoapps.coursegraph.tasks.authenticate_and_create_graph') @ddt.data((True, 2), (False, 0)) @ddt.unpack def test_dump_to_neo4j_cache( @@ -480,8 +523,8 @@ def test_dump_to_neo4j_cache( ) assert len(submitted) == expected_number_courses - @mock.patch('openedx.core.djangoapps.coursegraph.tasks.NodeMatcher') - @mock.patch('openedx.core.djangoapps.coursegraph.tasks.authenticate_and_create_graph') + @mock.patch('cms.djangoapps.coursegraph.tasks.NodeMatcher') + @mock.patch('cms.djangoapps.coursegraph.tasks.authenticate_and_create_graph') def test_dump_to_neo4j_published(self, mock_graph_constructor, mock_matcher_class): """ Tests that we only dump those courses that have been published after @@ -506,8 +549,8 @@ def test_dump_to_neo4j_published(self, mock_graph_constructor, mock_matcher_clas assert len(submitted) == 1 assert submitted[0] == str(self.course.id) - @mock.patch('openedx.core.djangoapps.coursegraph.tasks.get_course_last_published') - @mock.patch('openedx.core.djangoapps.coursegraph.tasks.get_command_last_run') + @mock.patch('cms.djangoapps.coursegraph.tasks.get_course_last_published') + @mock.patch('cms.djangoapps.coursegraph.tasks.get_command_last_run') @ddt.data( ( str(datetime(2016, 3, 30)), str(datetime(2016, 3, 31)), diff --git a/openedx/core/djangoapps/coursegraph/management/commands/tests/utils.py b/cms/djangoapps/coursegraph/management/commands/tests/utils.py similarity index 100% rename from openedx/core/djangoapps/coursegraph/management/commands/tests/utils.py rename to cms/djangoapps/coursegraph/management/commands/tests/utils.py diff --git a/cms/djangoapps/coursegraph/models.py b/cms/djangoapps/coursegraph/models.py new file mode 100644 index 000000000000..f053dc9993ac --- /dev/null +++ b/cms/djangoapps/coursegraph/models.py @@ -0,0 +1,21 @@ +""" +(Proxy) models supporting CourseGraph. +""" + +from openedx.core.djangoapps.content.course_overviews.models import CourseOverview + + +class CourseGraphCourseDump(CourseOverview): + """ + Proxy model for CourseOverview. + + Does *not* create/update/delete CourseOverview objects - only reads the objects. + Uses the course IDs of the CourseOverview objects to determine which courses + can be dumped to CourseGraph. + """ + class Meta: + proxy = True + + def __str__(self): + """Represent ourselves with the course key.""" + return str(self.id) diff --git a/openedx/core/djangoapps/coursegraph/tasks.py b/cms/djangoapps/coursegraph/tasks.py similarity index 84% rename from openedx/core/djangoapps/coursegraph/tasks.py rename to cms/djangoapps/coursegraph/tasks.py index ec943e752764..e2d4bf5b0976 100644 --- a/openedx/core/djangoapps/coursegraph/tasks.py +++ b/cms/djangoapps/coursegraph/tasks.py @@ -7,6 +7,7 @@ import logging from celery import shared_task +from django.conf import settings from django.utils import timezone from edx_django_utils.cache import RequestCache from edx_django_utils.monitoring import set_code_owner_attribute @@ -133,29 +134,26 @@ def get_command_last_run(course_key, graph): def get_course_last_published(course_key): """ - We use the CourseStructure table to get when this course was last - published. + Approximately when was a course last published? + + We use the 'modified' column in the CourseOverview table as a quick and easy + (although perhaps inexact) way of determining when a course was last + published. This works because CourseOverview rows are re-written upon + course publish. + Args: course_key: a CourseKey - Returns: The datetime the course was last published at, converted into - text, or None, if there's no record of the last time this course - was published. + Returns: The datetime the course was last published at, stringified. + Uses Python's default str(...) implementation for datetimes, which + is sortable and similar to ISO 8601: + https://docs.python.org/3/library/datetime.html#datetime.date.__str__ """ # Import is placed here to avoid model import at project startup. - from xmodule.modulestore.django import modulestore - from openedx.core.djangoapps.content.block_structure.models import BlockStructureModel - from openedx.core.djangoapps.content.block_structure.exceptions import BlockStructureNotFound + from openedx.core.djangoapps.content.course_overviews.models import CourseOverview - store = modulestore() - course_usage_key = store.make_course_usage_key(course_key) - try: - structure = BlockStructureModel.get(course_usage_key) - course_last_published_date = str(structure.modified) - except BlockStructureNotFound: - course_last_published_date = None - - return course_last_published_date + approx_last_published = CourseOverview.get_from_id(course_key).modified + return str(approx_last_published) def strip_branch_and_version(location): @@ -268,14 +266,14 @@ def should_dump_course(course_key, graph): @shared_task @set_code_owner_attribute -def dump_course_to_neo4j(course_key_string, credentials): +def dump_course_to_neo4j(course_key_string, connection_overrides=None): """ Serializes a course and writes it to neo4j. Arguments: - course_key: course key for the course to be exported - credentials (dict): the necessary credentials to connect - to neo4j and create a py2neo `Graph` obje + course_key_string: course key for the course to be exported + connection_overrides (dict): overrides to Neo4j connection + parameters specified in `settings.COURSEGRAPH_CONNECTION`. """ course_key = CourseKey.from_string(course_key_string) nodes, relationships = serialize_course(course_key) @@ -286,7 +284,9 @@ def dump_course_to_neo4j(course_key_string, credentials): len(relationships), ) - graph = authenticate_and_create_graph(credentials) + graph = authenticate_and_create_graph( + connection_overrides=connection_overrides + ) transaction = graph.begin() course_string = str(course_key) @@ -346,13 +346,13 @@ def create(cls, courses=None, skip=None): course_keys = [course_key for course_key in course_keys if course_key not in skip_keys] return cls(course_keys) - def dump_courses_to_neo4j(self, credentials, override_cache=False): + def dump_courses_to_neo4j(self, connection_overrides=None, override_cache=False): """ Method that iterates through a list of courses in a modulestore, serializes them, then submits tasks to write them to neo4j. Arguments: - credentials (dict): the necessary credentials to connect - to neo4j and create a py2neo `Graph` object + connection_overrides (dict): overrides to Neo4j connection + parameters specified in `settings.COURSEGRAPH_CONNECTION`. override_cache: serialize the courses even if they'be been recently serialized @@ -365,7 +365,7 @@ def dump_courses_to_neo4j(self, credentials, override_cache=False): submitted_courses = [] skipped_courses = [] - graph = authenticate_and_create_graph(credentials) + graph = authenticate_and_create_graph(connection_overrides) for index, course_key in enumerate(self.course_keys): # first, clear the request cache to prevent memory leaks @@ -389,36 +389,32 @@ def dump_courses_to_neo4j(self, credentials, override_cache=False): ) dump_course_to_neo4j.apply_async( - args=[str(course_key), credentials], + kwargs=dict( + course_key_string=str(course_key), + connection_overrides=connection_overrides, + ) ) submitted_courses.append(str(course_key)) return submitted_courses, skipped_courses -def authenticate_and_create_graph(credentials): +def authenticate_and_create_graph(connection_overrides=None): """ This function authenticates with neo4j and creates a py2neo graph object + Arguments: - credentials (dict): a dictionary of credentials used to authenticate, - and then create, a py2neo graph object. + connection_overrides (dict): overrides to Neo4j connection + parameters specified in `settings.COURSEGRAPH_CONNECTION`. Returns: a py2neo `Graph` object. """ - - host = credentials['host'] - port = credentials['port'] - secure = credentials['secure'] - neo4j_user = credentials['user'] - neo4j_password = credentials['password'] - - graph = Graph( - protocol='bolt', - password=neo4j_password, - user=neo4j_user, - address=host, - port=port, - secure=secure, - ) - - return graph + provided_overrides = { + key: value + for key, value in (connection_overrides or {}).items() + # Drop overrides whose values are `None`. Note that `False` is a + # legitimate override value that we don't want to drop here. + if value is not None + } + connection_with_overrides = {**settings.COURSEGRAPH_CONNECTION, **provided_overrides} + return Graph(**connection_with_overrides) diff --git a/cms/djangoapps/coursegraph/tests/__init__.py b/cms/djangoapps/coursegraph/tests/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/cms/djangoapps/coursegraph/tests/test_admin.py b/cms/djangoapps/coursegraph/tests/test_admin.py new file mode 100644 index 000000000000..21a26d84505b --- /dev/null +++ b/cms/djangoapps/coursegraph/tests/test_admin.py @@ -0,0 +1,227 @@ +""" +Shallow tests for CourseGraph dump-queueing Django admin interface. + +See ..management.commands.tests.test_dump_to_neo4j for more comprehensive +tests of dump_course_to_neo4j. +""" + +from unittest import mock + +import py2neo +from django.test import TestCase +from django.test.utils import override_settings +from freezegun import freeze_time + +from openedx.core.djangoapps.content.course_overviews.tests.factories import CourseOverviewFactory +from openedx.core.djangoapps.content.course_overviews.models import CourseOverview + +from .. import admin, tasks + + +_coursegraph_connection = { + "protocol": "bolt", + "secure": True, + "host": "example.edu", + "port": 7687, + "user": "neo4j", + "password": "fake-coursegraph-password", +} + +_configure_coursegraph_connection = override_settings( + COURSEGRAPH_CONNECTION=_coursegraph_connection, +) + +_patch_log_exception = mock.patch.object( + admin.log, 'exception', autospec=True +) + +_patch_apply_dump_task = mock.patch.object( + tasks.dump_course_to_neo4j, 'apply_async' +) + +_pretend_last_course_dump_was_may_2020 = mock.patch.object( + tasks, + 'get_command_last_run', + new=(lambda _key, _graph: "2020-05-01"), +) + +_patch_neo4j_graph = mock.patch.object( + tasks, 'Graph', autospec=True +) + +_make_neo4j_graph_raise = mock.patch.object( + tasks, 'Graph', side_effect=py2neo.ConnectionUnavailable( + 'we failed to connect or something!' + ) +) + + +class CourseGraphAdminActionsTestCase(TestCase): + """ + Test CourseGraph Django admin actions. + """ + + @classmethod + def setUpTestData(cls): + """ + Make course overviews with varying modification dates. + """ + super().setUpTestData() + cls.course_updated_in_april = CourseOverviewFactory(run='april_update') + cls.course_updated_in_june = CourseOverviewFactory(run='june_update') + cls.course_updated_in_july = CourseOverviewFactory(run='july_update') + cls.course_updated_in_august = CourseOverviewFactory(run='august_update') + + # For each course overview, make an arbitrary update and then save() + # so that its `.modified` date is set. + with freeze_time("2020-04-01"): + cls.course_updated_in_april.marketing_url = "https://example.org" + cls.course_updated_in_april.save() + with freeze_time("2020-06-01"): + cls.course_updated_in_june.marketing_url = "https://example.org" + cls.course_updated_in_june.save() + with freeze_time("2020-07-01"): + cls.course_updated_in_july.marketing_url = "https://example.org" + cls.course_updated_in_july.save() + with freeze_time("2020-08-01"): + cls.course_updated_in_august.marketing_url = "https://example.org" + cls.course_updated_in_august.save() + + @_configure_coursegraph_connection + @_pretend_last_course_dump_was_may_2020 + @_patch_neo4j_graph + @_patch_apply_dump_task + @_patch_log_exception + def test_dump_courses(self, mock_log_exception, mock_apply_dump_task, mock_neo4j_graph): + """ + Test that dump_courses admin action dumps requested courses iff they have + been modified since the last dump to coursegraph. + """ + modeladmin_mock = mock.MagicMock() + + # Request all courses except the August-updated one + requested_course_keys = { + str(self.course_updated_in_april.id), + str(self.course_updated_in_june.id), + str(self.course_updated_in_july.id), + } + admin.dump_courses( + modeladmin=modeladmin_mock, + request=mock.MagicMock(), + queryset=CourseOverview.objects.filter(id__in=requested_course_keys), + ) + + # User should have been messaged + assert modeladmin_mock.message_user.call_count == 1 + assert modeladmin_mock.message_user.call_args.args[1] == ( + "Enqueued dumps for 2 course(s). Skipped 1 unchanged course(s)." + ) + + # For enqueueing, graph should've been authenticated once, using configured settings. + assert mock_neo4j_graph.call_count == 1 + assert mock_neo4j_graph.call_args.args == () + assert mock_neo4j_graph.call_args.kwargs == _coursegraph_connection + + # No errors should've been logged. + assert mock_log_exception.call_count == 0 + + # April course should have been skipped because the command was last run in May. + # Dumps for June and July courses should have been enqueued. + assert mock_apply_dump_task.call_count == 2 + actual_dumped_course_keys = { + call_args.kwargs['kwargs']['course_key_string'] + for call_args in mock_apply_dump_task.call_args_list + } + expected_dumped_course_keys = { + str(self.course_updated_in_june.id), + str(self.course_updated_in_july.id), + } + assert actual_dumped_course_keys == expected_dumped_course_keys + + @_configure_coursegraph_connection + @_pretend_last_course_dump_was_may_2020 + @_patch_neo4j_graph + @_patch_apply_dump_task + @_patch_log_exception + def test_dump_courses_overriding_cache(self, mock_log_exception, mock_apply_dump_task, mock_neo4j_graph): + """ + Test that dump_coursese_overriding_cach admin action dumps requested courses + whether or not they been modified since the last dump to coursegraph. + """ + modeladmin_mock = mock.MagicMock() + + # Request all courses except the August-updated one + requested_course_keys = { + str(self.course_updated_in_april.id), + str(self.course_updated_in_june.id), + str(self.course_updated_in_july.id), + } + admin.dump_courses_overriding_cache( + modeladmin=modeladmin_mock, + request=mock.MagicMock(), + queryset=CourseOverview.objects.filter(id__in=requested_course_keys), + ) + + # User should have been messaged + assert modeladmin_mock.message_user.call_count == 1 + assert modeladmin_mock.message_user.call_args.args[1] == ( + "Enqueued dumps for 3 course(s)." + ) + + # For enqueueing, graph should've been authenticated once, using configured settings. + assert mock_neo4j_graph.call_count == 1 + assert mock_neo4j_graph.call_args.args == () + assert mock_neo4j_graph.call_args.kwargs == _coursegraph_connection + + # No errors should've been logged. + assert mock_log_exception.call_count == 0 + + # April, June, and July courses should have all been dumped. + assert mock_apply_dump_task.call_count == 3 + actual_dumped_course_keys = { + call_args.kwargs['kwargs']['course_key_string'] + for call_args in mock_apply_dump_task.call_args_list + } + expected_dumped_course_keys = { + str(self.course_updated_in_april.id), + str(self.course_updated_in_june.id), + str(self.course_updated_in_july.id), + } + assert actual_dumped_course_keys == expected_dumped_course_keys + + @_configure_coursegraph_connection + @_pretend_last_course_dump_was_may_2020 + @_make_neo4j_graph_raise + @_patch_apply_dump_task + @_patch_log_exception + def test_dump_courses_error(self, mock_log_exception, mock_apply_dump_task, mock_neo4j_graph): + """ + Test that the dump_courses admin action dumps messages the user if an error + occurs when trying to enqueue course dumps. + """ + modeladmin_mock = mock.MagicMock() + + # Request dump of all four courses. + admin.dump_courses( + modeladmin=modeladmin_mock, + request=mock.MagicMock(), + queryset=CourseOverview.objects.all() + ) + + # Admin user should have been messaged about failure. + assert modeladmin_mock.message_user.call_count == 1 + assert modeladmin_mock.message_user.call_args.args[1] == ( + "Error enqueueing dumps for 4 course(s): we failed to connect or something!" + ) + + # For enqueueing, graph should've been authenticated once, using configured settings. + assert mock_neo4j_graph.call_count == 1 + assert mock_neo4j_graph.call_args.args == () + assert mock_neo4j_graph.call_args.kwargs == _coursegraph_connection + + # Exception should have been logged. + assert mock_log_exception.call_count == 1 + assert "Failed to enqueue" in mock_log_exception.call_args.args[0] + + # No courses should have been dumped. + assert mock_apply_dump_task.call_count == 0 diff --git a/cms/envs/common.py b/cms/envs/common.py index 2cad881cec78..759f474d68e9 100644 --- a/cms/envs/common.py +++ b/cms/envs/common.py @@ -1611,7 +1611,7 @@ 'openedx.core.djangoapps.self_paced', # Coursegraph - 'openedx.core.djangoapps.coursegraph.apps.CoursegraphConfig', + 'cms.djangoapps.coursegraph.apps.CoursegraphConfig', # Credit courses 'openedx.core.djangoapps.credit.apps.CreditConfig', @@ -2235,7 +2235,33 @@ # .. setting_default: value of LOW_PRIORITY_QUEUE # .. setting_description: The name of the Celery queue to which CourseGraph refresh # tasks will be sent -COURSEGRAPH_JOB_QUEUE = LOW_PRIORITY_QUEUE +COURSEGRAPH_JOB_QUEUE: str = LOW_PRIORITY_QUEUE + +# .. setting_name: COURSEGRAPH_CONNECTION +# .. setting_default: 'bolt+s://localhost:7687', in dictionary form. +# .. setting_description: Dictionary specifying Neo4j connection parameters for +# CourseGraph refresh. Accepted keys are protocol ('bolt' or 'http'), +# secure (bool), host (str), port (int), user (str), and password (str). +# See https://py2neo.org/2021.1/profiles.html#individual-settings for a +# a description of each of those keys. +COURSEGRAPH_CONNECTION: dict = { + "protocol": "bolt", + "secure": True, + "host": "localhost", + "port": 7687, + "user": "neo4j", + "password": None, +} + +# .. toggle_name: COURSEGRAPH_DUMP_COURSE_ON_PUBLISH +# .. toggle_implementation: DjangoSetting +# .. toggle_creation_date: 2022-01-27 +# .. toggle_use_cases: open_edx +# .. toggle_default: False +# .. toggle_description: Whether, upon publish, a course should automatically +# be exported to Neo4j via the connection parameters specified in +# `COURSEGRAPH_CONNECTION`. +COURSEGRAPH_DUMP_COURSE_ON_PUBLISH: bool = False ########## Settings for video transcript migration tasks ############ VIDEO_TRANSCRIPT_MIGRATIONS_JOB_QUEUE = DEFAULT_PRIORITY_QUEUE diff --git a/cms/envs/devstack.py b/cms/envs/devstack.py index 6b632ddaf11f..6c67d4d3891e 100644 --- a/cms/envs/devstack.py +++ b/cms/envs/devstack.py @@ -256,6 +256,17 @@ def should_show_debug_toolbar(request): # lint-amnesty, pylint: disable=missing # (ref MST-637) PROCTORING_USER_OBFUSCATION_KEY = '85920908f28904ed733fe576320db18cabd7b6cd' +############## CourseGraph devstack settings ############################ + +COURSEGRAPH_CONNECTION: dict = { + "protocol": "bolt", + "secure": False, + "host": "edx.devstack.coursegraph", + "port": 7687, + "user": "neo4j", + "password": "edx", +} + #################### Webpack Configuration Settings ############################## WEBPACK_LOADER['DEFAULT']['TIMEOUT'] = 5 diff --git a/cms/envs/production.py b/cms/envs/production.py index 8b29e3d3f378..82244acbde9c 100644 --- a/cms/envs/production.py +++ b/cms/envs/production.py @@ -603,7 +603,7 @@ def get_env_setting(setting): 'queue': POLICY_CHANGE_GRADES_ROUTING_KEY}, 'cms.djangoapps.contentstore.tasks.update_search_index': { 'queue': UPDATE_SEARCH_INDEX_JOB_QUEUE}, - 'openedx.core.djangoapps.coursegraph.tasks.dump_course_to_neo4j': { + 'cms.djangoapps.coursegraph.tasks.dump_course_to_neo4j': { 'queue': COURSEGRAPH_JOB_QUEUE}, } diff --git a/lms/envs/common.py b/lms/envs/common.py index 83709b11db4b..bf81a9769ecb 100644 --- a/lms/envs/common.py +++ b/lms/envs/common.py @@ -3083,10 +3083,6 @@ def _make_locale_paths(settings): # pylint: disable=missing-function-docstring 'openedx.core.djangoapps.content.block_structure.apps.BlockStructureConfig', 'lms.djangoapps.course_blocks', - - # Coursegraph - 'openedx.core.djangoapps.coursegraph.apps.CoursegraphConfig', - # Mailchimp Syncing 'lms.djangoapps.mailing', diff --git a/openedx/core/djangoapps/coursegraph/README.rst b/openedx/core/djangoapps/coursegraph/README.rst deleted file mode 100644 index 0f9089ea42f2..000000000000 --- a/openedx/core/djangoapps/coursegraph/README.rst +++ /dev/null @@ -1,99 +0,0 @@ - -Coursegraph Support -------------------- - -This app exists to write data to "Coursegraph", a tool enabling Open edX developers and support specialists to inspect their platform instance's learning content. Coursegraph itself is simply an instance of Neo4j, which is an open-source graph database with a web interface. - -Deploying Coursegraph -===================== - -As of the Maple Open edX release, Coursegraph is *not* automatically provisioned by the community installation, and is *not* considered a "supported" part of the platform. However, operators may find the `neo4j Ansible playbook`_ useful as a starting point for deploying their own Coursegraph instance. Alternatively, Neo4j also maintains an official `Docker image`_. - -In order for Coursegraph to have queryable data, learning content from LMS must be written to Coursegraph using the ``dump_to_neo4j`` management command included in this app. In order for the data to stay up to date, it must be periodically refreshed, either manually or via an automation server such as Jenkins. - -**Please note**: Access to a populated Coursegraph instance confers access to all the learning content in the related Open edX LMS/CMS. The basic authentication provided by Neo4j may or may not be sufficient for your security needs. Consider taking additional security measures, such as restricting Coursegraph access to only users on a private VPN. - -.. _neo4j Ansible playbook: https://github.com/edx/configuration/blob/master/playbooks/neo4j.yml - -.. _Docker image: https://neo4j.com/developer/docker-run-neo4j/ - - -Coursegraph in Devstack -======================= - -Coursegraph is included as an "extra" component in the `Open edX Devstack`_. That is, it is not run or provisioned by default, but can be enabled on-demand. - -To provision Devstack Coursegraph with data from Devstack LMS, run:: - - make dev.provision.coursegraph - -Coursegraph should now be accessible at http://localhost:7474 with the username ``neo4j`` and the password ``edx``. - -Under the hood, the provisioning command just invokes ``dump_to_neo4j`` on your LMS, pointed at your Coursegraph. The provisioning command can be run again at any point in the future to refresh Coursegraph with new LMS data. The data in Coursegraph will persist unless you explicitly destroy it (as noted below). - -Other Devstack Coursegraph commands include:: - - make dev.up.coursegraph # Bring up the container (without re-provisioning). - make dev.down.coursegraph # Stop and remove the container. - make dev.shell.coursegraph # Start a shell session in the container. - make dev.attach.coursegraph # Attach to the container. - make dev.destroy.coursegraph # Stop the container and destroy its database. - -The above commands should be run in your ``devstack`` folder, and they assume that LMS is already properly provisioned. See the `Devstack interface`_ for more details. - -.. _Open edX Devstack: https://github.com/edx/devstack/ -.. _Devstack interface: https://edx.readthedocs.io/projects/open-edx-devstack/en/latest/devstack_interface.html - - -Querying Coursegraph -==================== - -Coursegraph is queryable using the `Cypher`_ query language. Open edX learning content is represented in Neo4j using a straightforward scheme: - -* A node is an XBlock usage. - -* Nodes are tagged with their ``block_type``, such as: - - * ``course`` - * ``chapter`` - * ``sequential`` - * ``vertical`` - * ``problem`` - * ``html`` - * etc. - -* Every node is also tagged with ``item``. - -* Parent-child relationships in the course hierarchy are reflected in the ``PARENT_OF`` relationship. - -* Ordered sibling relationships in the course hierarchy are reflected in the ``PRECEDES`` relationship. - -* Fields on each XBlock usage (``.display_name``, ``.data``, etc) are available on the corresponding node. - -.. _Cypher: https://neo4j.com/developer/cypher/ - - -Example Queries -*************** - -How many XBlocks exist in the LMS, by type? :: - - MATCH - (c:course) -[:PARENT_OF*]-> (n:item) - RETURN - distinct(n.block_type) as block_type, - count(n) as number - order by - number DESC - - -In a given course, which units contain problems with custom Python grading code? :: - - MATCH - (c:course) -[:PARENT_OF*]-> (u:vertical) -[:PARENT_OF*]-> (p:problem) - WHERE - p.data CONTAINS 'loncapa/python' - AND - c.course_key = '' - RETURN - u.location diff --git a/openedx/core/djangoapps/coursegraph/management/commands/dump_to_neo4j.py b/openedx/core/djangoapps/coursegraph/management/commands/dump_to_neo4j.py deleted file mode 100644 index 226b3c174a7a..000000000000 --- a/openedx/core/djangoapps/coursegraph/management/commands/dump_to_neo4j.py +++ /dev/null @@ -1,79 +0,0 @@ -""" -This file contains a management command for exporting the modulestore to -neo4j, a graph database. -""" - - -import logging -from textwrap import dedent - -from django.core.management.base import BaseCommand - -from openedx.core.djangoapps.coursegraph.tasks import ModuleStoreSerializer - -log = logging.getLogger(__name__) - - -class Command(BaseCommand): - """ - Command to dump modulestore data to neo4j - - Takes the following named arguments: - host: the host of the neo4j server - port: the port on the neo4j server that accepts Bolt requests - secure: if set, connects to server over Bolt/TLS, otherwise uses Bolt - user: the username for the neo4j user - password: the user's password - courses: list of course key strings to serialize. If not specified, all - courses in the modulestore are serialized. - override: if true, dump all--or all specified--courses, regardless of when - they were last dumped. If false, or not set, only dump those courses that - were updated since the last time the command was run. - - Example usage: - python manage.py lms dump_to_neo4j --host localhost --https_port 7473 \ - --secure --user user --password password --settings=production - """ - help = dedent(__doc__).strip() - - def add_arguments(self, parser): - parser.add_argument('--host', type=str) - parser.add_argument('--port', type=int, default=7687) - parser.add_argument('--secure', action='store_true') - parser.add_argument('--user', type=str) - parser.add_argument('--password', type=str) - parser.add_argument('--courses', type=str, nargs='*') - parser.add_argument('--skip', type=str, nargs='*') - parser.add_argument( - '--override', - action='store_true', - help='dump all--or all specified--courses, ignoring cache', - ) - - def handle(self, *args, **options): - """ - Iterates through each course, serializes them into graphs, and saves - those graphs to neo4j. - """ - - mss = ModuleStoreSerializer.create(options['courses'], options['skip']) - - submitted_courses, skipped_courses = mss.dump_courses_to_neo4j( - options, override_cache=options['override'] - ) - - log.info( - "%d courses submitted for export to neo4j. %d courses skipped.", - len(submitted_courses), - len(skipped_courses), - ) - - if not submitted_courses: - print("No courses submitted for export to neo4j at all!") - return - - if submitted_courses: - print( - "These courses were submitted for export to neo4j successfully:\n\t" + - "\n\t".join(submitted_courses) - )