-
Notifications
You must be signed in to change notification settings - Fork 0
/
get_def_and_audio.py
executable file
·134 lines (114 loc) · 4.37 KB
/
get_def_and_audio.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
@author: mbd
"""
# Make requests
import requests
# XML parser
import xml.etree.ElementTree as ET
# OS
import os, errno
# Regex library
import re
# Text-to-speech library
from gtts import gTTS as g
def removeStar(text):
exp = "(\*)"
t = re.sub(exp, '', text)
return t
def removeOth(text):
exp = "(\s:)"
t = re.sub(exp, ', ', str(text))
exp = ":"
t = re.sub(exp, '', t)
return t
# Your Meriam Webster Learners Dictionary api key
key = ""
# Define an input file name to read words from it
# Define an output file to write definition data
input_file = "words.txt"
output_file = "definitions.txt"
selections = []
# You may produce audio files and get word definitions at the same time.
# Or you may choose to do one of these operations seperately.
produce_audio = False
get_definitions = True
let_user_select_definition = False
if produce_audio:
# Create a directory to store audio
# files if it doesn't exist.
try:
os.makedirs('audio')
except OSError as e:
if e.errno != errno.EEXIST:
raise
with open(input_file,"r") as f:
words = f.read().splitlines()
for word in words:
if produce_audio:
try:
# Define relative path
r_path = './audio/' + word + '.mp3'
# Check if audio already exists
if not os.path.isfile(r_path):
# Create a text-to-speech object
tts = g(text=word, lang='en')
# Save the audio to the given path
tts.save(r_path)
print('[INFO] Audio is saved for the word {} at {}'.format(word, r_path))
else:
print('[INFO] Audio already exists for the word {} at {}'.format(word, r_path))
except Exception as e:
print('[ERROR] Getting audio failed for the word {}: {}'.format(word, e))
if get_definitions:
try:
# Create the full URI
uri = "http://www.dictionaryapi.com/api/v1/references/learners/xml/" + str(word) + "?key=" + key
# Get data
r = requests.get(uri)
if r.status_code == 200:
# Pre-process
text = r.text.encode('ascii','ignore')
# Definitions are stored in this list
definitions = []
# Parse xml
tree = ET.fromstring(text)
# Definition counter for each word
count = 0
print("Word : {}".format(word))
# Extract data from XML
for i, item in enumerate(tree.iter('entry')):
w = removeStar(item.find('hw').text)
if w == str(word):
# Part of speech
pos = item.find('fl').text
let_user_select_definition and print(pos)
# Definitions
for j, dfn in enumerate(item.iter('dt')):
t = removeOth(dfn.text)
if t != '':
let_user_select_definition and print("{}. {}".format(count,t))
count += 1
definitions.append((pos,t))
selection = 0
# If the word has definitions more than one
# user should make a selection.
if let_user_select_definition and len(definitions) > 1:
selection = int(input("Select the definition:\n> "))
pos = definitions[selection][0]
dfn = definitions[selection][1]
selections.append((word,dfn,pos))
else:
print("[ERROR] For word '{}' got status code {}".format(word,r.status_code))
selections.append((word,'NO-DEFINITION',''))
except Exception as e:
print('[ERROR] Desc failed for the word {}: {}'.format(word, e))
selections.append((word,'NO-DEFINITION',''))
print('---------------------------------')
if get_definitions:
# Write definition and POS values of each word to a file
with open(output_file,"w") as f:
for word, dfn, pos in selections:
f.write(word + "\t" + dfn + "\t" + pos + "\n")
print("[INFO] Word definitions are written to the file {} successfully".format(output_file))