Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Source specifying for relative links is allowed #232

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,11 @@ The link map file can be supplied using `-f` or `--link_file`::

cc2olx -r zip -i <IMSCC_FILE> -f <CSV_FILE>

If the original course content contains relative links and the resources
(images, documents etc) the links point to are not included into the exported
course dump, you can specify their source using `-s` flag:

cc2olx -i <IMSCC_FILE> -s <RELATIVE_LINKS_SOURCE>

Test Data
---------
Expand Down
6 changes: 6 additions & 0 deletions src/cc2olx/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,4 +69,10 @@ def parse_args(args=None):
"should contain consumer_id, consumer_key and consumer_secret."
),
)
parser.add_argument(
"-s",
"--relative_links_source",
nargs="?",
help="The relative links source in the format '<scheme>://<netloc>', e.g. 'https://example.com'",
)
return parser.parse_args(args)
2 changes: 2 additions & 0 deletions src/cc2olx/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
OLX_STATIC_DIR = "static"
OLX_STATIC_PATH_TEMPLATE = f"/{OLX_STATIC_DIR}/{{static_filename}}"
23 changes: 19 additions & 4 deletions src/cc2olx/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,25 @@
from cc2olx import filesystem
from cc2olx import olx
from cc2olx.cli import parse_args, RESULT_TYPE_FOLDER, RESULT_TYPE_ZIP
from cc2olx.models import Cartridge, OLX_STATIC_DIR
from cc2olx.constants import OLX_STATIC_DIR
from cc2olx.models import Cartridge
from cc2olx.settings import collect_settings


def convert_one_file(input_file, workspace, link_file=None, passport_file=None):
def convert_one_file(
input_file,
workspace,
link_file=None,
passport_file=None,
relative_links_source=None,
):
filesystem.create_directory(workspace)

cartridge = Cartridge(input_file, workspace)
cartridge.load_manifest_extracted()
cartridge.normalize()

olx_export = olx.OlxExport(cartridge, link_file, passport_file)
olx_export = olx.OlxExport(cartridge, link_file, passport_file, relative_links_source)
olx_filename = cartridge.directory.parent / (cartridge.directory.name + "-course.xml")
policy_filename = cartridge.directory.parent / "policy.json"

Expand Down Expand Up @@ -53,6 +60,7 @@ def main():
workspace = settings["workspace"]
link_file = settings["link_file"]
passport_file = settings["passport_file"]
relative_links_source = settings["relative_links_source"]

# setup logger
logging_config = settings["logging_config"]
Expand All @@ -64,7 +72,14 @@ def main():

for input_file in settings["input_files"]:
try:
convert_one_file(input_file, temp_workspace, link_file, passport_file)
convert_one_file(
input_file,
temp_workspace,
link_file,
passport_file,
relative_links_source,
)

except Exception:
logger.exception("Error while converting %s file", input_file)

Expand Down
5 changes: 3 additions & 2 deletions src/cc2olx/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import zipfile

from cc2olx import filesystem
from cc2olx.constants import OLX_STATIC_PATH_TEMPLATE
from cc2olx.external.canvas import ModuleMeta
from cc2olx.qti import QtiParser
from cc2olx.utils import clean_file_name
Expand Down Expand Up @@ -343,7 +344,7 @@ def get_resource_content(self, identifier):
return "html", {"html": html}
elif "web_resources" in str(res_filename) and imghdr.what(str(res_filename)):
static_filename = str(res_filename).split("web_resources/")[1]
olx_static_path = "/{}/{}".format(OLX_STATIC_DIR, static_filename)
olx_static_path = OLX_STATIC_PATH_TEMPLATE.format(static_filename=static_filename)
html = (
'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>'
'</head><body><p><img src="{}" alt="{}"></p></body></html>'.format(olx_static_path, static_filename)
Expand All @@ -353,7 +354,7 @@ def get_resource_content(self, identifier):
# This webcontent is outside of ``web_resources`` directory
# So we need to manually copy it to OLX_STATIC_DIR
self.extra_static_files.append(res_relative_path)
olx_static_path = "/{}/{}".format(OLX_STATIC_DIR, res_relative_path)
olx_static_path = OLX_STATIC_PATH_TEMPLATE.format(static_filename=res_relative_path)
html = (
'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>'
'</head><body><p><a href="{}" alt="{}">{}<a></p></body></html>'.format(
Expand Down
18 changes: 17 additions & 1 deletion src/cc2olx/olx.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from lxml import html
from cc2olx.iframe_link_parser import KalturaIframeLinkParser

from cc2olx.constants import OLX_STATIC_PATH_TEMPLATE
from cc2olx.qti import QtiExport
from cc2olx.utils import element_builder, passport_file_parser

Expand Down Expand Up @@ -36,11 +37,12 @@ class OlxExport:
QTI = "qti"
DISCUSSION = "discussion"

def __init__(self, cartridge, link_file=None, passport_file=None):
def __init__(self, cartridge, link_file=None, passport_file=None, relative_links_source=None):
self.cartridge = cartridge
self.doc = None
self.link_file = link_file
self.passport_file = passport_file
self.relative_links_source = relative_links_source
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add some validation for the format of relative_links_source.

Copy link
Contributor Author

@myhailo-chernyshov-rg myhailo-chernyshov-rg Dec 5, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Validation is added on the argument parsing step.

self.iframe_link_parser = None
if link_file:
self.iframe_link_parser = KalturaIframeLinkParser(self.link_file)
Expand Down Expand Up @@ -250,6 +252,18 @@ def process_external_tools_link(item, html):
html = html.replace(item, external_tool_url)
return html

def process_extra_static_files(item, html):
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add a docstring here explaining what this function is for.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

if self.relative_links_source is None:
return html

for static_file in self.cartridge.extra_static_files:
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How large can extra_static_files get? This looks like it could get pretty expensive to loop through every entity in this list for each item.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

During the resource content collection, when the external file is found, it's added to this list. So, the list size isn't limited and depends on CC archive content. But we need to process each such link to make it absolute, so we do need to iterate over the list. I don't think it's too expensive because other exporter tasks require much more actions to perform. Do you have any ideas how we can handle all the links without iterating over them?

if item == OLX_STATIC_PATH_TEMPLATE.format(static_filename=static_file):
return html

url = urllib.parse.urljoin(self.relative_links_source, item)
html = html.replace(item, url)
return html

for _, item in items:
if "IMS-CC-FILEBASE" in item:
html = process_ims_cc_filebase(item, html)
Expand All @@ -259,6 +273,8 @@ def process_external_tools_link(item, html):
html = process_external_tools_link(item, html)
elif "CANVAS_OBJECT_REFERENCE" in item:
html = process_canvas_reference(item, html)
else:
html = process_extra_static_files(item, html)

return html

Expand Down
1 change: 1 addition & 0 deletions src/cc2olx/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,5 +47,6 @@ def collect_settings(parsed_args):
"workspace": Path.cwd() / parsed_args.output,
"link_file": parsed_args.link_file,
"passport_file": parsed_args.passport_file,
"relative_links_source": parsed_args.relative_links_source,
}
return settings
Loading