-
Notifications
You must be signed in to change notification settings - Fork 0
custom_module.py
Alexandra Duboy edited this page Jan 17, 2020
·
1 revision
from typing import List
from outline.patternset import PatternSet
from outline.textutil import replace
def get_patterns() -> List[PatternSet]:
keywords = get_keywords()
pattern_sets = [
PatternSet([r'(\(\d+ ILCS \d+\/\d+\.{0,1}\d*\) \(from Ch. \d+, par\. (\d+)\))'], keywords),
PatternSet([r'(Sec\. (\d+\.\d*)\.{0,1}) \w+'], keywords),
PatternSet([r'((\d+\.\d+[A-Z]*\-\d+))\s+[\[A-Z]+', r'((\d+\.\d+))\s+[\[A-Z]+'], keywords),
PatternSet([r'(\(([ixv]+)\))\s+'], keywords),
PatternSet([r'(\(([a-z]+)\))\s+'], keywords),
PatternSet([r'\s+(([a-z]{1,2})\.)\s+'], keywords),
PatternSet([r'(\(([A-Z]+)\))\s+'], keywords),
PatternSet([r'(\((\d+)\))\s+'], keywords)
]
return pattern_sets
def get_keywords() -> List[str]:
return ['subparagraph', 'paragraph', 'section', 'subsection', 'item', 'chapter']
def custom_clean(text) -> str:
text = replace(text, r'\n\s*History\:\s+', r'\d+\n', '\n')
return text