Skip to content

custom_module.py

Alexandra Duboy edited this page Jan 17, 2020 · 1 revision
from typing import List

from outline.patternset import PatternSet
from outline.textutil import replace


def get_patterns() -> List[PatternSet]:
    keywords = get_keywords()

    pattern_sets = [
        PatternSet([r'(\(\d+ ILCS \d+\/\d+\.{0,1}\d*\) \(from Ch. \d+, par\. (\d+)\))'], keywords),
        PatternSet([r'(Sec\. (\d+\.\d*)\.{0,1}) \w+'], keywords),
        PatternSet([r'((\d+\.\d+[A-Z]*\-\d+))\s+[\[A-Z]+', r'((\d+\.\d+))\s+[\[A-Z]+'], keywords),
        PatternSet([r'(\(([ixv]+)\))\s+'], keywords),
        PatternSet([r'(\(([a-z]+)\))\s+'], keywords),
        PatternSet([r'\s+(([a-z]{1,2})\.)\s+'], keywords),
        PatternSet([r'(\(([A-Z]+)\))\s+'], keywords),
        PatternSet([r'(\((\d+)\))\s+'], keywords)
    ]

    return pattern_sets


def get_keywords() -> List[str]:
    return ['subparagraph', 'paragraph', 'section', 'subsection', 'item', 'chapter']


def custom_clean(text) -> str:
    text = replace(text, r'\n\s*History\:\s+', r'\d+\n', '\n')
    return text
Clone this wiki locally