From 7dc81d95eeb163781395898201c9f5e3dda627de Mon Sep 17 00:00:00 2001 From: Danyal-Faheem Date: Wed, 21 Aug 2024 18:39:08 +0500 Subject: [PATCH] feat: add do command to upgrade the charset and collation of MySQL tables --- ...131_danyal.faheem_upgrade_mysql_utf8mb4.md | 7 + docs/local.rst | 27 ++++ docs/tutorials/datamigration.rst | 2 + tests/commands/test_jobs.py | 75 +++++++++++ tutor/commands/jobs.py | 104 ++++++++++++++ tutor/commands/upgrade/compose.py | 6 + tutor/utils.py | 127 ++++++++++++++++++ 7 files changed, 348 insertions(+) create mode 100644 changelog.d/20240520_150131_danyal.faheem_upgrade_mysql_utf8mb4.md diff --git a/changelog.d/20240520_150131_danyal.faheem_upgrade_mysql_utf8mb4.md b/changelog.d/20240520_150131_danyal.faheem_upgrade_mysql_utf8mb4.md new file mode 100644 index 0000000000..33d9d84c1b --- /dev/null +++ b/changelog.d/20240520_150131_danyal.faheem_upgrade_mysql_utf8mb4.md @@ -0,0 +1,7 @@ +- 💥[Feature] Upgrade default charset and collation of mysql to utf8mb4 and utf8mb4_unicode_ci respectively (by @Danyal-Faheem) + - Add do command to upgrade the charset and collation of tables in mysql. + - The command will perform the following upgrades: + - Upgrade all `utf8mb3` charset to `utf8mb4` + - Upgrade collation `utf8mb3_general_ci` to `utf8mb4_unicode_ci` + - Upgrade collation `utf8mb3_bin` to `utf8mb4_bin` + - Upgrade collation `utf8mb3_*` to `utf8mb4_*` \ No newline at end of file diff --git a/docs/local.rst b/docs/local.rst index 7f3ebcb307..0064148e9c 100644 --- a/docs/local.rst +++ b/docs/local.rst @@ -141,6 +141,33 @@ The default Open edX theme is rather bland, so Tutor makes it easy to switch to Out of the box, only the default "open-edx" theme is available. We also developed `Indigo, a beautiful, customizable theme `__ which is easy to install with Tutor. +Changing the mysql charset and collation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. note:: This command has been tested only for users upgrading from Quince. While it is expected to work for users on earlier releases, please use it with caution as it has not been tested with those versions. + +Your database's charset and collation might not support specific characters or emojis. Tutor will run fine without this change unless you explicity use specific characters in your instance. + +.. warning:: This change is potentially irreversible. It is recommended to make a backup of the MySQL database. See the :ref:`database dump instructions ` to create a DB dump. + +To change the charset and collation of all the tables in the openedx database, run:: + + tutor local do convert-mysql-utf8mb4-charset + +Alternatively, if you only want to change the charset and collation of certain tables or exclude certain tables, you can use the ``--include`` or ``--exclude`` options. These options take comma separated names of tables/apps with no space in-between. To upgrade the ``courseware_studentmodule`` and ``courseware_studentmodulehistory`` tables, run:: + + tutor local do convert-mysql-utf8mb4-charset --include=courseware_studentmodule,courseware_studentmodulehistory + +Tutor performs pattern matching from the start of the table name so you can just enter the name of the app to include/exclude all the tables under that app. To upgrade all the tables in the database except the ones under the student and wiki apps, run:: + + tutor local do convert-mysql-utf8mb4-charset --exclude=student,wiki + +In the above command, all the tables whose name starts with either student or wiki will be excluded from the upgrade process. + +By default, only the tables in the openedx database are changed. If you are running any plugins with their own databases, you can upgrade them by utilizing the ``--database`` option. To upgrade all the tables in the discovery database, run:: + + tutor local do convert-mysql-utf8mb4-charset --database=discovery + Running arbitrary ``manage.py`` commands ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/tutorials/datamigration.rst b/docs/tutorials/datamigration.rst index e7923a6463..63e4506568 100644 --- a/docs/tutorials/datamigration.rst +++ b/docs/tutorials/datamigration.rst @@ -22,6 +22,8 @@ With Tutor, all data are stored in a single folder. This means that it's extreme tutor local start -d +.. _database_dumps: + Making database dumps --------------------- diff --git a/tests/commands/test_jobs.py b/tests/commands/test_jobs.py index 2ab388773b..d519629c83 100644 --- a/tests/commands/test_jobs.py +++ b/tests/commands/test_jobs.py @@ -90,3 +90,78 @@ def test_set_theme(self) -> None: self.assertIn("lms-job", dc_args) self.assertIn("assign_theme('beautiful', 'domain1')", dc_args[-1]) self.assertIn("assign_theme('beautiful', 'domain2')", dc_args[-1]) + + def test_convert_mysql_utf8mb4_charset_all_tables(self) -> None: + with temporary_root() as root: + self.invoke_in_root(root, ["config", "save"]) + with patch("tutor.utils.docker_compose") as mock_docker_compose: + result = self.invoke_in_root( + root, + [ + "local", + "do", + "convert-mysql-utf8mb4-charset", + "--non-interactive", + ], + ) + dc_args, _dc_kwargs = mock_docker_compose.call_args + + self.assertIsNone(result.exception) + self.assertEqual(0, result.exit_code) + self.assertIn("lms-job", dc_args) + self.assertIn("utf8mb4", dc_args[-1]) + self.assertIn("openedx", dc_args[-1]) + self.assertIn("utf8mb4_unicode_ci", dc_args[-1]) + self.assertNotIn("regexp", dc_args[-1]) + + def test_convert_mysql_utf8mb4_charset_include_tables(self) -> None: + with temporary_root() as root: + self.invoke_in_root(root, ["config", "save"]) + with patch("tutor.utils.docker_compose") as mock_docker_compose: + result = self.invoke_in_root( + root, + [ + "local", + "do", + "convert-mysql-utf8mb4-charset", + "--include=courseware_studentmodule,xblock", + ], + ) + dc_args, _dc_kwargs = mock_docker_compose.call_args + + self.assertIsNone(result.exception) + self.assertEqual(0, result.exit_code) + self.assertIn("lms-job", dc_args) + self.assertIn("openedx", dc_args[-1]) + self.assertIn("utf8mb4", dc_args[-1]) + self.assertIn("utf8mb4_unicode_ci", dc_args[-1]) + self.assertIn("regexp", dc_args[-1]) + self.assertIn("courseware_studentmodule", dc_args[-1]) + self.assertIn("xblock", dc_args[-1]) + + def test_convert_mysql_utf8mb4_charset_exclude_tables(self) -> None: + with temporary_root() as root: + self.invoke_in_root(root, ["config", "save"]) + with patch("tutor.utils.docker_compose") as mock_docker_compose: + result = self.invoke_in_root( + root, + [ + "local", + "do", + "convert-mysql-utf8mb4-charset", + "--database=discovery", + "--exclude=course,auth", + ], + ) + dc_args, _dc_kwargs = mock_docker_compose.call_args + + self.assertIsNone(result.exception) + self.assertEqual(0, result.exit_code) + self.assertIn("lms-job", dc_args) + self.assertIn("utf8mb4", dc_args[-1]) + self.assertIn("utf8mb4_unicode_ci", dc_args[-1]) + self.assertIn("discovery", dc_args[-1]) + self.assertIn("regexp", dc_args[-1]) + self.assertIn("NOT", dc_args[-1]) + self.assertIn("course", dc_args[-1]) + self.assertIn("auth", dc_args[-1]) diff --git a/tutor/commands/jobs.py b/tutor/commands/jobs.py index 7510a83b31..cea579ae31 100644 --- a/tutor/commands/jobs.py +++ b/tutor/commands/jobs.py @@ -13,6 +13,7 @@ from tutor import config as tutor_config from tutor import env, fmt, hooks +from tutor.utils import get_mysql_change_charset_query from tutor.hooks import priorities @@ -315,6 +316,108 @@ def sqlshell(args: list[str]) -> t.Iterable[tuple[str, str]]: yield ("lms", command) +@click.command( + short_help="Convert the charset and collation of mysql to utf8mb4.", + help=( + "Convert the charset and collation of mysql to utf8mb4. You can either upgrade all tables, specify only certain tables to upgrade or specify certain tables to exclude from the upgrade process" + ), + context_settings={"ignore_unknown_options": True}, +) +@click.option( + "--include", + is_flag=False, + nargs=1, + help="Apps/Tables to include in the upgrade process. Requires comma-seperated values with no space in-between.", +) +@click.option( + "--exclude", + is_flag=False, + nargs=1, + help="Apps/Tables to exclude from the upgrade process. Requires comma-seperated values with no space in-between.", +) +@click.option( + "--database", + is_flag=False, + nargs=1, + default="{{ OPENEDX_MYSQL_DATABASE }}", + show_default=True, + required=True, + type=str, + help="The database of which the tables are to be upgraded", +) +@click.option("-I", "--non-interactive", is_flag=True, help="Run non-interactively") +def convert_mysql_utf8mb4_charset( + include: str, + exclude: str, + database: str, + non_interactive: bool, +) -> t.Iterable[tuple[str, str]]: + """ + Do command to upgrade the charset and collation of tables in MySQL + + Can specify whether to upgrade all tables, or include certain tables/apps or to exclude certain tables/apps + """ + + context = click.get_current_context().obj + config = tutor_config.load(context.root) + + if not config["RUN_MYSQL"]: + fmt.echo_info( + f"You are not running MySQL (RUN_MYSQL=false). It is your " + f"responsibility to upgrade the charset and collation of your MySQL instance." + ) + return + + # Prompt user for confirmation of upgrading all tables + if not include and not exclude and not non_interactive: + upgrade_all_tables = click.confirm( + "Are you sure you want to upgrade all tables? This process is potentially irreversible and may take a long time.", + prompt_suffix=" ", + ) + if not upgrade_all_tables: + return + + charset_to_upgrade_from = "utf8mb3" + charset = "utf8mb4" + collation = "utf8mb4_unicode_ci" + + query_to_append = "" + if include or exclude: + + def generate_query_to_append(tables: list[str], exclude: bool = False) -> str: + include = "NOT" if exclude else "" + table_names = f"^{tables[0]}" + for i in range(1, len(tables)): + table_names += f"|^{tables[i]}" + # We use regexp for pattern matching the names from the start of the tablename + query_to_append = f"AND table_name {include} regexp '{table_names}' " + return query_to_append + + query_to_append += ( + generate_query_to_append(include.split(",")) if include else "" + ) + query_to_append += ( + generate_query_to_append(exclude.split(","), exclude=True) + if exclude + else "" + ) + click.echo( + fmt.title( + f"Updating charset and collation of tables in the {database} database to {charset} and {collation} respectively." + ) + ) + query = get_mysql_change_charset_query( + database, charset, collation, query_to_append, charset_to_upgrade_from + ) + click.echo(fmt.info(query)) + mysql_command = ( + "mysql --user={{ MYSQL_ROOT_USERNAME }} --password={{ MYSQL_ROOT_PASSWORD }} --host={{ MYSQL_HOST }} --port={{ MYSQL_PORT }} --skip-column-names --silent " + + shlex.join([f"--database={database}", "-e", query]) + ) + yield ("lms", mysql_command) + click.echo(fmt.info(f"MySQL charset and collation successfully upgraded")) + + def add_job_commands(do_command_group: click.Group) -> None: """ This is meant to be called with the `local/dev/k8s do` group commands, to add the @@ -390,6 +493,7 @@ def do_callback(service_commands: t.Iterable[tuple[str, str]]) -> None: hooks.Filters.CLI_DO_COMMANDS.add_items( [ + convert_mysql_utf8mb4_charset, createuser, importdemocourse, importdemolibraries, diff --git a/tutor/commands/upgrade/compose.py b/tutor/commands/upgrade/compose.py index df54c71d8b..111b630316 100644 --- a/tutor/commands/upgrade/compose.py +++ b/tutor/commands/upgrade/compose.py @@ -164,6 +164,12 @@ def upgrade_from_quince(context: click.Context, config: Config) -> None: upgrade_mongodb(context, config, "5.0.26", "5.0") upgrade_mongodb(context, config, "6.0.14", "6.0") upgrade_mongodb(context, config, "7.0.7", "7.0") + click.echo( + fmt.alert( + """It is strongly recommended to upgrade your character set and collation of the MySQL database after upgrading to Redwood. +You can use the convert-mysql-utf8mb4-charset do job to upgrade the collation and character set. You can find more details regarding the command at https://docs.tutor.edly.io/local.html#changing-the-mysql-charset-and-collation""" + ) + ) def upgrade_mongodb( diff --git a/tutor/utils.py b/tutor/utils.py index cdb082f38a..0eab6394c8 100644 --- a/tutor/utils.py +++ b/tutor/utils.py @@ -366,3 +366,130 @@ def format_table(rows: List[Tuple[str, ...]], separator: str = "\t") -> str: # Append EOL at all lines but the last one formatted += "\n" return formatted + + +def get_mysql_change_charset_query( + database: str, + charset: str, + collation: str, + query_to_append: str, + charset_to_upgrade_from: str, +) -> str: + """ + Helper function to generate the mysql query to upgrade the charset and collation of tables + + Utilized in the `tutor local do convert-mysql-utf8mb4-charset` command + """ + return f""" + + DROP PROCEDURE IF EXISTS UpdateColumns; + DELIMITER $$ + + CREATE PROCEDURE UpdateColumns() + BEGIN + + DECLARE done_columns_loop INT DEFAULT FALSE; + DECLARE _table_name VARCHAR(255); + DECLARE _table_name_copy VARCHAR(255) DEFAULT ""; + DECLARE _column_name VARCHAR(255); + DECLARE _column_type VARCHAR(255); + DECLARE _collation_name VARCHAR(255); + + # We explicitly upgrade the utf8mb3_general_ci collations to utf8mb4_unicode_ci + # The other collations are upgraded from utf8mb3_* to utf8mb4_* + # For any other collation, we leave it as it is + DECLARE columns_cur CURSOR FOR + SELECT + TABLE_NAME, + COLUMN_NAME, + COLUMN_TYPE, + CASE + WHEN COLLATION_NAME LIKE CONCAT('{charset_to_upgrade_from}', '_general_ci') THEN 'utf8mb4_unicode_ci' + WHEN COLLATION_NAME LIKE CONCAT('{charset_to_upgrade_from}', '_%') THEN CONCAT('{charset}', SUBSTRING_INDEX(COLLATION_NAME, '{charset_to_upgrade_from}', -1)) + ELSE COLLATION_NAME + END AS COLLATION_NAME + FROM + INFORMATION_SCHEMA.COLUMNS + WHERE + TABLE_SCHEMA = '{database}' + AND COLLATION_NAME IS NOT NULL {query_to_append}; + DECLARE CONTINUE HANDLER FOR NOT FOUND SET done_columns_loop = TRUE; + OPEN columns_cur; + columns_loop: LOOP + FETCH columns_cur INTO _table_name, _column_name, _column_type, _collation_name; + + IF done_columns_loop THEN + LEAVE columns_loop; + END IF; + + # First, upgrade the default charset and collation of the table + If _table_name <> _table_name_copy THEN + select _table_name; + SET FOREIGN_KEY_CHECKS = 0; + SET @stmt = CONCAT('ALTER TABLE `', _table_name, '` CONVERT TO CHARACTER SET {charset} COLLATE {collation};'); + PREPARE query FROM @stmt; + EXECUTE query; + DEALLOCATE PREPARE query; + SET FOREIGN_KEY_CHECKS = 1; + SET _table_name_copy = _table_name; + END IF; + + # Then, upgrade the default charset and collation of each column + # This sequence of table -> column is necessary to preserve column defaults + SET FOREIGN_KEY_CHECKS = 0; + SET @statement = CONCAT('ALTER TABLE `', _table_name, '` MODIFY `', _column_name, '` ', _column_type,' CHARACTER SET {charset} COLLATE ', _collation_name, ';'); + PREPARE query FROM @statement; + EXECUTE query; + DEALLOCATE PREPARE query; + SET FOREIGN_KEY_CHECKS = 1; + + END LOOP; + CLOSE columns_cur; + + END$$ + + DELIMITER ; + + DROP PROCEDURE IF EXISTS UpdateTables; + DELIMITER $$ + + CREATE PROCEDURE UpdateTables() + # To upgrade the default character set and collation of any tables that were skipped from the previous procedure + BEGIN + + DECLARE done INT DEFAULT FALSE; + DECLARE table_name_ VARCHAR(255); + DECLARE cur CURSOR FOR + SELECT table_name FROM information_schema.tables + WHERE table_schema = '{database}' AND table_type = "BASE TABLE" AND table_collation not like 'utf8mb4_%' {query_to_append}; + DECLARE CONTINUE HANDLER FOR NOT FOUND SET done = TRUE; + + OPEN cur; + tables_loop: LOOP + FETCH cur INTO table_name_; + + IF done THEN + LEAVE tables_loop; + END IF; + + select table_name_; + + SET FOREIGN_KEY_CHECKS = 0; + SET @stmt = CONCAT('ALTER TABLE `', table_name_, '` CONVERT TO CHARACTER SET {charset} COLLATE {collation};'); + PREPARE query FROM @stmt; + EXECUTE query; + DEALLOCATE PREPARE query; + + SET FOREIGN_KEY_CHECKS = 1; + + END LOOP; + CLOSE cur; + + END$$ + DELIMITER ; + + use {database}; + ALTER DATABASE {database} CHARACTER SET {charset} COLLATE {collation}; + CALL UpdateColumns(); + CALL UpdateTables(); + """