Skip to content

Commit

Permalink
feat: add do command to upgrade the charset and collation of MySQL da…
Browse files Browse the repository at this point in the history
…tabase
  • Loading branch information
Danyal-Faheem committed Jun 14, 2024
1 parent c7b4327 commit 8a279f0
Show file tree
Hide file tree
Showing 7 changed files with 244 additions and 1 deletion.
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
- 💥[Feature] Upgrade default charset and collation of mysql to utf8mb4 and utf8mb4_unicode_ci respectively (by @Danyal-Faheem)
- Add do command to upgrade the charset and collation of tables in mysql.
- The command will perform the following upgrades:
- Upgrade all `utf8mb3` charset to `utf8mb4`
- Upgrade collation `utf8mb3_general_ci` to `utf8mb4_unicode_ci`
- Upgrade collation `utf8mb3_bin` to `utf8mb4_bin`
- Upgrade collation `utf8mb3_*` to `utf8mb4_*`
27 changes: 27 additions & 0 deletions docs/local.rst
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,33 @@ The default Open edX theme is rather bland, so Tutor makes it easy to switch to

Out of the box, only the default "open-edx" theme is available. We also developed `Indigo, a beautiful, customizable theme <https://github.com/overhangio/indigo>`__ which is easy to install with Tutor.

Changing the mysql charset and collation
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

.. note:: This command is only for users upgrading from quince.

Your database's charset and collation might not support specific characters or emojis. Tutor will run fine without this change unless you explicity use specific characters in your instance.

.. warning:: This change is potentially irreversible. It is recommended to make a backup of the MySQL database. See the :ref:`database dump instructions <database_dumps>` to create a DB dump.

To change the charset and collation of all the tables in the openedx database, run::

tutor local do convert-mysql-utf8mb4-charset

Alternatively, if you only want to change the charset and collation of certain tables or exclude certain tables, you can use the ``--include`` or ``--exclude`` options. These options take comma separated names of tables/apps with no space in-between. To upgrade the ``courseware_studentmodule`` and ``courseware_studentmodulehistory`` tables, run::

tutor local do convert-mysql-utf8mb4-charset --include=courseware_studentmodule,courseware_studentmodulehistory

Tutor performs pattern matching from the start of the table name so you can just enter the name of the app to include/exclude all the tables under that app. To upgrade all the tables in the database except the ones under the student and wiki apps, run::

tutor local do convert-mysql-utf8mb4-charset --exclude=student,wiki

In the above command, all the tables whose name starts with either student or wiki will be excluded from the upgrade process.

By default, only the tables in the openedx database are changed. If you are running any plugins with their own databases, you can upgrade them by utilizing the ``--database`` option. To upgrade all the tables in the discovery database, run::

tutor local do convert-mysql-utf8mb4-charset --database=discovery

Running arbitrary ``manage.py`` commands
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Expand Down
2 changes: 2 additions & 0 deletions docs/tutorials/datamigration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ With Tutor, all data are stored in a single folder. This means that it's extreme

tutor local start -d

.. _database_dumps:

Making database dumps
---------------------

Expand Down
75 changes: 75 additions & 0 deletions tests/commands/test_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,3 +90,78 @@ def test_set_theme(self) -> None:
self.assertIn("lms-job", dc_args)
self.assertIn("assign_theme('beautiful', 'domain1')", dc_args[-1])
self.assertIn("assign_theme('beautiful', 'domain2')", dc_args[-1])

def test_convert_mysql_utf8mb4_charset_all_tables(self) -> None:
with temporary_root() as root:
self.invoke_in_root(root, ["config", "save"])
with patch("tutor.utils.docker_compose") as mock_docker_compose:
result = self.invoke_in_root(
root,
[
"local",
"do",
"convert-mysql-utf8mb4-charset",
"--non-interactive"
],
)
dc_args, _dc_kwargs = mock_docker_compose.call_args

self.assertIsNone(result.exception)
self.assertEqual(0, result.exit_code)
self.assertIn("lms-job", dc_args)
self.assertIn("utf8mb4", dc_args[-1])
self.assertIn("openedx", dc_args[-1])
self.assertIn("utf8mb4_unicode_ci", dc_args[-1])
self.assertNotIn("regexp", dc_args[-1])

def test_convert_mysql_utf8mb4_charset_include_tables(self) -> None:
with temporary_root() as root:
self.invoke_in_root(root, ["config", "save"])
with patch("tutor.utils.docker_compose") as mock_docker_compose:
result = self.invoke_in_root(
root,
[
"local",
"do",
"convert-mysql-utf8mb4-charset",
"--include=courseware_studentmodule,xblock",
],
)
dc_args, _dc_kwargs = mock_docker_compose.call_args

self.assertIsNone(result.exception)
self.assertEqual(0, result.exit_code)
self.assertIn("lms-job", dc_args)
self.assertIn("openedx", dc_args[-1])
self.assertIn("utf8mb4", dc_args[-1])
self.assertIn("utf8mb4_unicode_ci", dc_args[-1])
self.assertIn("regexp", dc_args[-1])
self.assertIn("courseware_studentmodule", dc_args[-1])
self.assertIn("xblock", dc_args[-1])

def test_convert_mysql_utf8mb4_charset_exclude_tables(self) -> None:
with temporary_root() as root:
self.invoke_in_root(root, ["config", "save"])
with patch("tutor.utils.docker_compose") as mock_docker_compose:
result = self.invoke_in_root(
root,
[
"local",
"do",
"convert-mysql-utf8mb4-charset",
"--database=discovery",
"--exclude=course,auth",
],
)
dc_args, _dc_kwargs = mock_docker_compose.call_args

self.assertIsNone(result.exception)
self.assertEqual(0, result.exit_code)
self.assertIn("lms-job", dc_args)
self.assertIn("utf8mb4", dc_args[-1])
self.assertIn("utf8mb4_unicode_ci", dc_args[-1])
self.assertIn("discovery", dc_args[-1])
self.assertIn("regexp", dc_args[-1])
self.assertIn("NOT", dc_args[-1])
self.assertIn("course", dc_args[-1])
self.assertIn("auth", dc_args[-1])
67 changes: 66 additions & 1 deletion tutor/commands/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

from tutor import config as tutor_config
from tutor import env, fmt, hooks
from tutor.utils import get_mysql_change_charset_query
from tutor.hooks import priorities


Expand Down Expand Up @@ -308,12 +309,75 @@ def sqlshell(args: list[str]) -> t.Iterable[tuple[str, str]]:
Extra arguments will be passed to the `mysql` command verbatim. For instance, to
show tables from the "openedx" database, run `do sqlshell openedx -e 'show tables'`.
"""
command = "mysql --user={{ MYSQL_ROOT_USERNAME }} --password={{ MYSQL_ROOT_PASSWORD }} --host={{ MYSQL_HOST }} --port={{ MYSQL_PORT }} --default-character-set=utf8mb3"
command = "mysql --user={{ MYSQL_ROOT_USERNAME }} --password={{ MYSQL_ROOT_PASSWORD }} --host={{ MYSQL_HOST }} --port={{ MYSQL_PORT }} --default-character-set=utf8mb4"
if args:
command += " " + shlex.join(args) # pylint: disable=protected-access
yield ("lms", command)



@click.command(
short_help="Convert the charset and collation of mysql to utf8mb4.",
help=(
"Convert the charset and collation of mysql to utf8mb4. You can either upgrade all tables, specify only certain tables to upgrade or specify certain tables to exclude from the upgrade process"
),
context_settings={"ignore_unknown_options": True},
)
@click.option("--include", is_flag=False, nargs=1, help="Apps/Tables to include in the upgrade process. Requires comma-seperated values with no space in-between.")
@click.option("--exclude", is_flag=False, nargs=1, help="Apps/Tables to exclude from the upgrade process. Requires comma-seperated values with no space in-between.")
@click.option("--database", is_flag=False, nargs=1, default="{{ OPENEDX_MYSQL_DATABASE }}", show_default=True, required=True, type=str, help="The database of which the tables are to be upgraded")
@click.option("-I", "--non-interactive", is_flag=True, help="Run non-interactively")
@click.pass_context
def convert_mysql_utf8mb4_charset(
context: click.Context,
include: str,
exclude: str,
database: str,
non_interactive: bool,
) -> t.Iterable[tuple[str, str]]:
"""
Do command to upgrade the charset and collation of tables in MySQL
Can specify whether to upgrade all tables, or include certain tables/apps or to exclude certain tables/apps
"""
# Prompt user for confirmation of upgrading all tables
if not include and not exclude and not non_interactive:
upgrade_all_tables = click.confirm("Are you sure you want to upgrade all tables? This process is potentially irreversible and may take a long time.", prompt_suffix= " ")
if not upgrade_all_tables:
return
context = click.get_current_context().obj
config = tutor_config.load(context.root)

if not config["RUN_MYSQL"]:
fmt.echo_info(
f"You are not running MySQL (RUN_MYSQL=false). It is your "
f"responsibility to update your MySQL instance to {charset} charset and {collation} collation."
)
return

charset_to_upgrade_from = "utf8mb3"
charset = "utf8mb4"
collation = "utf8mb4_unicode_ci"
query_to_append = ""
if include or exclude:
def generate_query_to_append(tables: list[str], exclude: bool = False):
include = "NOT" if exclude else ""
table_names = f"^{tables[0]}"
for i in range(1, len(tables)):
table_names += f"|^{tables[i]}"
# We use regexp for pattern matching the names from the start of the tablename
query_to_append = f"AND table_name {include} regexp '{table_names}' "
return query_to_append
query_to_append += generate_query_to_append(include.split(',')) if include else ""
query_to_append += generate_query_to_append(exclude.split(','), exclude=True) if exclude else ""
click.echo(fmt.title(f"Updating charset and collation of tables in the {database} database to {charset} and {collation} respectively."))
query = get_mysql_change_charset_query(database, charset, collation, query_to_append, charset_to_upgrade_from)
click.echo(fmt.info(query))
mysql_command = "mysql --user={{ MYSQL_ROOT_USERNAME }} --password={{ MYSQL_ROOT_PASSWORD }} --host={{ MYSQL_HOST }} --port={{ MYSQL_PORT }} --skip-column-names --silent " + shlex.join([f"--database={database}","-e", query])
yield ("lms", mysql_command)
click.echo(fmt.info(f"MySQL charset and collation successfully upgraded"))


def add_job_commands(do_command_group: click.Group) -> None:
"""
This is meant to be called with the `local/dev/k8s do` group commands, to add the
Expand Down Expand Up @@ -389,6 +453,7 @@ def do_callback(service_commands: t.Iterable[tuple[str, str]]) -> None:

hooks.Filters.CLI_DO_COMMANDS.add_items(
[
convert_mysql_utf8mb4_charset,
createuser,
importdemocourse,
importdemolibraries,
Expand Down
2 changes: 2 additions & 0 deletions tutor/commands/upgrade/compose.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,8 @@ def upgrade_from_quince(context: click.Context, config: Config) -> None:
upgrade_mongodb(context, config, "5.0.26", "5.0")
upgrade_mongodb(context, config, "6.0.14", "6.0")
upgrade_mongodb(context, config, "7.0.7", "7.0")
click.echo(fmt.alert("""It is strongly recommended to upgrade your character set and collation of the MySQL database after upgrading to Redwood.
You can use the convert-mysql-utf8mb4-charset do job to upgrade the collation and character set. You can find more details regarding the command at https://docs.tutor.edly.io/local.html#changing-the-mysql-charset-and-collation"""))


def upgrade_mongodb(
Expand Down
65 changes: 65 additions & 0 deletions tutor/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,3 +366,68 @@ def format_table(rows: List[Tuple[str, ...]], separator: str = "\t") -> str:
# Append EOL at all lines but the last one
formatted += "\n"
return formatted


def get_mysql_change_charset_query(
database: str,
charset: str,
collation: str,
query_to_append: str,
charset_to_upgrade_from : str,
) -> None:
"""
Helper function to generate the mysql query to upgrade the charset and collation of tables
Utilized in the `tutor local do convert-mysql-utf8mb4-charset` command
"""
return f"""
DROP PROCEDURE IF EXISTS UpdateTables;
DELIMITER $$
CREATE PROCEDURE UpdateTables()
BEGIN
DECLARE done_tables_loop INT DEFAULT FALSE;
DECLARE _table_name VARCHAR(255);
DECLARE _column_name VARCHAR(255);
DECLARE _column_type VARCHAR(255);
DECLARE _collation_name VARCHAR(255);
# We only upgrade columns with charset_to_upgrade_from(utf8mb3) charset for now
# This is done so that we do not upgrade columns that have been explicitly set to utf8mb4
# We also explicitly upgrade the utf8mb3_general_ci collations to utf8mb4_unicode_ci
# The other collations are upgraded from utf8mb3_* to utf8mb4_*
DECLARE columns_cur CURSOR FOR
SELECT
table_name,
column_name,
column_type,
CONCAT('{charset}', IF(STRCMP(substring_index(collation_name, '{charset_to_upgrade_from}', -1), '_general_ci') = 0, substring_index('{collation}', '{charset}', -1), substring_index(collation_name, '{charset_to_upgrade_from}', -1))) as collation FROM information_schema.columns
WHERE table_schema = '{database}' AND character_set_name = '{charset_to_upgrade_from}' {query_to_append};
DECLARE CONTINUE HANDLER FOR NOT FOUND SET done_tables_loop = TRUE;
OPEN columns_cur;
tables_loop: LOOP
FETCH columns_cur INTO _table_name, _column_name, _column_type, _collation_name;
IF done_tables_loop THEN
LEAVE tables_loop;
END IF;
SET FOREIGN_KEY_CHECKS = 0;
SET @statement = CONCAT('ALTER TABLE `', _table_name, '` MODIFY `', _column_name, '` ', _column_type,' CHARACTER SET {charset} COLLATE ', _collation_name, ';');
PREPARE query FROM @statement;
EXECUTE query;
DEALLOCATE PREPARE query;
SET FOREIGN_KEY_CHECKS = 1;
END LOOP;
CLOSE columns_cur;
END$$
DELIMITER ;
use {database};
ALTER DATABASE {database} CHARACTER SET {charset} COLLATE {collation};
CALL UpdateTables();
"""

0 comments on commit 8a279f0

Please sign in to comment.