Skip to content

Commit

Permalink
Force UTF-8
Browse files Browse the repository at this point in the history
Instead of relying on the default encoding (LANG, LC_*), force encoding of read and written files to UTF-8.  Also add some test cases for UTF-8 text.  Bump patch version.
  • Loading branch information
beorn committed May 12, 2018
1 parent 4863963 commit abcc866
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 26 deletions.
9 changes: 5 additions & 4 deletions icssplit.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
will split `mycal.ics` into outcal1.ics outcal2.ics outcal3.cs...
"""

__version__ = '1.0.0'
__version__ = '1.0.1'
__author__ = 'Bjorn Stabell <[email protected]>'
__all__ = []

Expand All @@ -26,11 +26,11 @@
logger = logging.getLogger(__name__)
log = logger.info


BEGIN_CALENDAR = 'BEGIN:VCALENDAR'
END_CALENDAR = 'END:VCALENDAR'
BEGIN_EVENT = 'BEGIN:VEVENT'
END_EVENT = 'END:VEVENT'
enc = {'encoding': 'utf8'} # don't rely on LANG, force encoding to UTF-8

def icssplit(src, maxsize):
"""\
Expand Down Expand Up @@ -93,11 +93,12 @@ def cli():
outfile_base = args['OUTFILE'] or infile
maxsize = int(args['--maxsize'] or 0) or 1024*1024*0.9


log(f"parsing {infile} and splitting into files of maxsize={maxsize}")
for (indx, outf) in enumerate(icssplit(open(infile, 'r'), maxsize)):
for (indx, outf) in enumerate(icssplit(open(infile, 'r', **enc), maxsize)):
outfile = f"{outfile_base}-{indx}.ics"
log(f"writing {outfile}")
with open(outfile, 'w') as fh:
with open(outfile, 'w', **enc) as fh:
fh.write(outf)

if __name__ == '__main__': cli()
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setuptools.setup(
name="icssplit",
version="1.0.0",
version="1.0.1",
url="https://github.com/beorn/icssplit",
download_url = 'https://github.com/beorn/icssplit/archive/1.0.0.tar.gz',

Expand Down
34 changes: 17 additions & 17 deletions tests/test_icssplit.ics
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ LAST-MODIFIED:20130801T025310Z
LOCATION:
SEQUENCE:0
STATUS:CONFIRMED
SUMMARY:Test event 1
SUMMARY:Test event 2 æøå
END:VEVENT
BEGIN:VEVENT
DTSTART:20130801T030000Z
Expand All @@ -123,7 +123,7 @@ LAST-MODIFIED:20130801T025310Z
LOCATION:
SEQUENCE:0
STATUS:CONFIRMED
SUMMARY:Test event 1
SUMMARY:Test event 3
END:VEVENT
BEGIN:VEVENT
DTSTART:20130801T030000Z
Expand All @@ -135,7 +135,7 @@ LAST-MODIFIED:20130801T025310Z
LOCATION:
SEQUENCE:0
STATUS:CONFIRMED
SUMMARY:Test event 1
SUMMARY:Test event 4 你好
END:VEVENT
BEGIN:VEVENT
DTSTART:20130801T030000Z
Expand All @@ -147,7 +147,7 @@ LAST-MODIFIED:20130801T025310Z
LOCATION:
SEQUENCE:0
STATUS:CONFIRMED
SUMMARY:Test event 1
SUMMARY:Test event 5
END:VEVENT
BEGIN:VEVENT
DTSTART:20130801T030000Z
Expand All @@ -159,7 +159,7 @@ LAST-MODIFIED:20130801T025310Z
LOCATION:
SEQUENCE:0
STATUS:CONFIRMED
SUMMARY:Test event 1
SUMMARY:Test event 6
END:VEVENT
BEGIN:VEVENT
DTSTART:20130801T030000Z
Expand All @@ -171,7 +171,7 @@ LAST-MODIFIED:20130801T025310Z
LOCATION:
SEQUENCE:0
STATUS:CONFIRMED
SUMMARY:Test event 1
SUMMARY:Test event 7
END:VEVENT
BEGIN:VEVENT
DTSTART:20130801T030000Z
Expand All @@ -183,7 +183,7 @@ LAST-MODIFIED:20130801T025310Z
LOCATION:
SEQUENCE:0
STATUS:CONFIRMED
SUMMARY:Test event 1
SUMMARY:Test event 8
END:VEVENT
BEGIN:VEVENT
DTSTART:20130801T030000Z
Expand All @@ -195,7 +195,7 @@ LAST-MODIFIED:20130801T025310Z
LOCATION:
SEQUENCE:0
STATUS:CONFIRMED
SUMMARY:Test event 1
SUMMARY:Test event 9
END:VEVENT
BEGIN:VEVENT
DTSTART:20130801T030000Z
Expand All @@ -207,7 +207,7 @@ LAST-MODIFIED:20130801T025310Z
LOCATION:
SEQUENCE:0
STATUS:CONFIRMED
SUMMARY:Test event 1
SUMMARY:Test event 10
END:VEVENT
BEGIN:VEVENT
DTSTART:20130801T030000Z
Expand All @@ -219,7 +219,7 @@ LAST-MODIFIED:20130801T025310Z
LOCATION:
SEQUENCE:0
STATUS:CONFIRMED
SUMMARY:Test event 1
SUMMARY:Test event 11
END:VEVENT
BEGIN:VEVENT
DTSTART:20130801T030000Z
Expand All @@ -231,7 +231,7 @@ LAST-MODIFIED:20130801T025310Z
LOCATION:
SEQUENCE:0
STATUS:CONFIRMED
SUMMARY:Test event 1
SUMMARY:Test event 12
END:VEVENT
BEGIN:VEVENT
DTSTART:20130801T030000Z
Expand All @@ -243,7 +243,7 @@ LAST-MODIFIED:20130801T025310Z
LOCATION:
SEQUENCE:0
STATUS:CONFIRMED
SUMMARY:Test event 1
SUMMARY:Test event 13
END:VEVENT
BEGIN:VEVENT
DTSTART:20130801T030000Z
Expand All @@ -255,7 +255,7 @@ LAST-MODIFIED:20130801T025310Z
LOCATION:
SEQUENCE:0
STATUS:CONFIRMED
SUMMARY:Test event 1
SUMMARY:Test event 14
END:VEVENT
BEGIN:VEVENT
DTSTART:20130801T030000Z
Expand All @@ -267,7 +267,7 @@ LAST-MODIFIED:20130801T025310Z
LOCATION:
SEQUENCE:0
STATUS:CONFIRMED
SUMMARY:Test event 1
SUMMARY:Test event 15
END:VEVENT
BEGIN:VEVENT
DTSTART:20130801T030000Z
Expand All @@ -279,7 +279,7 @@ LAST-MODIFIED:20130801T025310Z
LOCATION:
SEQUENCE:0
STATUS:CONFIRMED
SUMMARY:Test event 1
SUMMARY:Test event 16
END:VEVENT
BEGIN:VEVENT
DTSTART:20130801T030000Z
Expand All @@ -291,7 +291,7 @@ LAST-MODIFIED:20130801T025310Z
LOCATION:
SEQUENCE:0
STATUS:CONFIRMED
SUMMARY:Test event 1
SUMMARY:Test event 17
END:VEVENT
BEGIN:VEVENT
DTSTART:20130801T030000Z
Expand All @@ -303,6 +303,6 @@ LAST-MODIFIED:20130801T025310Z
LOCATION:
SEQUENCE:0
STATUS:CONFIRMED
SUMMARY:Test event 1
SUMMARY:Test event 18
END:VEVENT
END:VCALENDAR
6 changes: 2 additions & 4 deletions tests/test_icssplit.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ Read some sample .ics data:
>>> from icssplit import icssplit
>>> ics = open('tests/test_icssplit.ics').read()
>>> len(ics)
5846
5863

We've got 18 events here:

Expand All @@ -17,15 +17,13 @@ We'll just get one file back if we split with maxsize=1MB:
>>> len(files)
1
>>> len(files[0])
5846
5863

If we use a smaller maxsize we'll get more files back:

>>> files = list(icssplit(ics, maxsize=2500))
>>> len(files)
6
>>> [ len(file) for file in files ]
[2471, 2471, 2471, 2471, 2471, 2471]

If we set maxsize to a size smaller than even the preamble and postamble,
then what we'll get back is one event per file. We could/should perhaps
Expand Down

0 comments on commit abcc866

Please sign in to comment.