Skip to content

Commit

Permalink
Merge pull request #357 from edx/cdyer/xml-string-field
Browse files Browse the repository at this point in the history
Add XMLString field type which validates input.
  • Loading branch information
Cliff Dyer authored Dec 15, 2016
2 parents 01d0e27 + 17f26bc commit 6fe0a6d
Show file tree
Hide file tree
Showing 9 changed files with 125 additions and 32 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
*.pyc
local_settings.py
*.egg-info
.treerc
.coverage
.tox
.treerc
htmlcov
*~

Expand Down
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
include xblock/VERSION.txt
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ pyyaml
lxml
webob>=1.6.0
simplejson
six
pytz
python-dateutil
markupsafe
Expand Down
4 changes: 2 additions & 2 deletions script/max_pylint_violations
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
#!/bin/bash
DEFAULT_MAX=11
DEFAULT_MAX=8

pylint xblock | tee /tmp/pylint-xblock.log
ERR=`grep -E "^[C|R|W|E]:" /tmp/pylint-xblock.log | wc -l`
MAX=${1-$DEFAULT_MAX}
if [ $ERR -ge $MAX ]; then
if [ $ERR -gt $MAX ]; then
echo "too many pylint violations: $ERR (max is $MAX)"
exit 1
else
Expand Down
16 changes: 13 additions & 3 deletions setup.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,9 +1,17 @@
"""Set up for XBlock"""
#!/usr/bin/env python

"""
Set up for XBlock
"""

import os.path
from setuptools import setup

version_file = os.path.join(os.path.dirname(__file__), 'xblock/VERSION.txt')

setup(
name='XBlock',
version='0.4.12',
version=open(version_file).read().strip(),
description='XBlock Core Library',
packages=[
'xblock',
Expand All @@ -12,14 +20,16 @@
'xblock.test',
'xblock.test.django',
],
include_package_data=True,
install_requires=[
'fs',
'lxml',
'markupsafe',
'python-dateutil',
'pytz',
'pyyaml',
'six',
'webob',
'fs',
],
extras_require={
'django': ['django-pyfs']
Expand Down
1 change: 1 addition & 0 deletions xblock/VERSION.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0.4.13
9 changes: 6 additions & 3 deletions xblock/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@
XBlock Courseware Components
"""

# For backwards compatability, provide the XBlockMixin in xblock.fields
# without causing a circular import
import os
import warnings
import xblock.core
import xblock.fields
Expand All @@ -20,6 +19,10 @@ def __init__(self, *args, **kwargs):
super(XBlockMixin, self).__init__(*args, **kwargs)


# For backwards compatability, provide the XBlockMixin in xblock.fields
# without causing a circular import
xblock.fields.XBlockMixin = XBlockMixin

__version__ = "0.4.7"
VERSION_FILE = os.path.join(os.path.dirname(__file__), 'VERSION.txt')

__version__ = open(VERSION_FILE).read().strip()
61 changes: 51 additions & 10 deletions xblock/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,24 +9,26 @@
from collections import namedtuple
import copy
import datetime
import dateutil.parser
import hashlib
import itertools
import pytz
import json
import traceback
import unicodedata
import warnings
import json

import dateutil.parser
from lxml import etree
import pytz
import six
import yaml
import unicodedata

from xblock.internal import Nameable


# __all__ controls what classes end up in the docs, and in what order.
__all__ = [
'BlockScope', 'UserScope', 'Scope', 'ScopeIds',
'Field',
'Boolean', 'Dict', 'Float', 'Integer', 'List', 'Set', 'String',
'Boolean', 'Dict', 'Float', 'Integer', 'List', 'Set', 'String', 'XMLString',
'XBlockMixin',
]

Expand Down Expand Up @@ -832,17 +834,30 @@ class String(JSONField):
"""
MUTABLE = False
VALID_CONTROLS = {u'\n', u'\r', u'\t'}

def _valid_unichar(self, character):
"""
Strip invalid control characters from a unicode text object.
"""
return unicodedata.category(character)[0] != u'C' or character in self.VALID_CONTROLS

def _valid_bytechar(self, character):
"""
Strip invalid control characters from a bytestring object.
"""
return ord(character) >= 32 or character.decode('ascii', errors='replace') in self.VALID_CONTROLS

def _sanitize(self, value):
"""
Remove the control characters that are not allowed in XML:
https://www.w3.org/TR/xml/#charsets
Leave all other characters.
"""
if isinstance(value, unicode):
new_value = u''.join(ch for ch in value if unicodedata.category(ch)[0] != u'C' or ch in (u'\n', u'\r', u'\t'))
elif isinstance(value, str):
new_value = ''.join(ch for ch in value if ord(ch) >= 32 or ch in ('\n', '\r', '\t'))
if isinstance(value, six.text_type):
new_value = u''.join(ch for ch in value if self._valid_unichar(ch))
elif isinstance(value, six.binary_type):
new_value = b''.join(ch for ch in value if self._valid_bytechar(ch))
else:
return value
# The new string will be equivalent to the original string if no control characters are present.
Expand Down Expand Up @@ -871,6 +886,32 @@ def none_to_xml(self):
enforce_type = from_json


class XMLString(String):
"""
A field class for representing an XML string.
The value, as loaded or enforced, can either be None or a basestring instance.
If it is a basestring instance, it must be valid XML. If it is not valid XML,
an lxml.etree.XMLSyntaxError will be raised.
"""

def to_json(self, value):
"""
Serialize the data, ensuring that it is valid XML (or None).
Raises an lxml.etree.XMLSyntaxError if it is a basestring but not valid
XML.
"""
if self._enable_enforce_type:
value = self.enforce_type(value)
return super(XMLString, self).to_json(value)

def enforce_type(self, value):
if value is not None:
etree.XML(value)
return value


class DateTime(JSONField):
"""
A field for representing a datetime.
Expand Down
61 changes: 48 additions & 13 deletions xblock/test/test_fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,34 +2,31 @@
Tests for classes extending Field.
"""

# Allow accessing protected members for testing purposes
# pylint: disable=W0212

from mock import Mock
import unittest
# pylint: disable=abstract-class-instantiated, protected-access

from contextlib import contextmanager
import datetime as dt
import pytz
import warnings
import itertools
import math
import textwrap
import itertools
from contextlib import contextmanager
import unittest
import warnings

import ddt
from lxml import etree
from mock import Mock
import pytz

from xblock.core import XBlock, Scope
from xblock.field_data import DictFieldData
from xblock.fields import (
Any, Boolean, Dict, Field, Float,
Integer, List, Set, String, DateTime, Reference, ReferenceList, Sentinel,
UNIQUE_ID
Any, Boolean, Dict, Field, Float, Integer, List, Set, String, XMLString, DateTime, Reference, ReferenceList,
ScopeIds, Sentinel, UNIQUE_ID, scope_key,
)

from xblock.test.tools import (
assert_equals, assert_not_equals, assert_in, assert_not_in, assert_false, TestRuntime
)
from xblock.fields import scope_key, ScopeIds


class FieldTest(unittest.TestCase):
Expand Down Expand Up @@ -244,6 +241,44 @@ def test_control_characters_filtered(self):
self.assertJSONOrSetGetEquals(u'\n\r\t', u'\n\v\r\b\t')


@ddt.ddt
class XMLStringTest(FieldTest):
"""
Tests the XMLString Field.
"""
FIELD_TO_TEST = XMLString

@ddt.data(
u'<abc>Hello</abc>',
u'<abc attr="yes">Hello</abc>',
u'<xml/>',
'<bytes/>',
'<unicode>\xc8\x88</unicode>',
None
)
def test_json_equals(self, input_text):
xml_string = self.FIELD_TO_TEST(enforce_type=True)
self.assertEqual(xml_string.to_json(input_text), input_text)

@ddt.data(
'text',
'<xml',
'<xml attr=3/>',
'<xml attr="3/>',
'<open>',
'<open>with text',
'<xml/>trailing text',
'<open>text</close>',
'<invalid_utf8 char="\x9e"/>',
)
def test_bad_xml(self, input_text):
# pylint: disable=no-member
xml_string = self.FIELD_TO_TEST(enforce_type=True)
self.assertRaises(etree.XMLSyntaxError, xml_string.to_json, input_text)
unchecked_xml_string = self.FIELD_TO_TEST(enforce_type=False)
self.assertEqual(unchecked_xml_string.to_json(input_text), input_text)


@ddt.ddt
class DateTest(FieldTest):
"""
Expand Down

0 comments on commit 6fe0a6d

Please sign in to comment.