-
Notifications
You must be signed in to change notification settings - Fork 4
/
basic_queries.py
59 lines (47 loc) · 2.12 KB
/
basic_queries.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import sys
import os
import argparse
base_dir = os.path.dirname(os.path.abspath(__file__))
script_dir = os.path.join(base_dir, 'Common')
sys.path.insert(0, script_dir)
import common
import re
import time
from polyglotdb.utils import ensure_local_database_running
from polyglotdb import CorpusConfig, CorpusContext
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('corpus_name', help='Name of the corpus')
parser.add_argument('-r', '--reset', help="Reset the corpus", action='store_true')
parser.add_argument('-d', '--docker', help="This script is being called from Docker", action='store_true')
args = parser.parse_args()
corpus_name = args.corpus_name
reset = args.reset
docker = args.docker
directories = [x for x in os.listdir(base_dir) if os.path.isdir(x) and x != 'Common']
if args.corpus_name not in directories:
print(
'The corpus {0} does not have a directory (available: {1}). Please make it with a {0}.yaml file inside.'.format(
args.corpus_name, ', '.join(directories)))
sys.exit(1)
corpus_conf = common.load_config(corpus_name)
print('Processing...')
if reset:
common.reset(corpus_name)
ip = common.server_ip
if docker:
ip = common.docker_ip
with ensure_local_database_running(corpus_name, port=common.server_port, ip=ip, token=common.load_token()) as params:
print(params)
config = CorpusConfig(corpus_name, **params)
config.formant_source = 'praat'
config.debug = True
with CorpusContext(config) as c:
print(c.hierarchy)
# Common set up
common.loading(config, corpus_conf['corpus_directory'], corpus_conf['input_format'])
common.lexicon_enrichment(config, corpus_conf['unisyn_spade_directory'], corpus_conf['dialect_code'])
common.speaker_enrichment(config, corpus_conf['speaker_enrichment_file'])
common.basic_enrichment(config, corpus_conf['vowel_inventory'] + corpus_conf['extra_syllabic_segments'], corpus_conf['pauses'])
common.basic_queries(config)
print('Finishing up!')