Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue 17 #21

Merged
merged 8 commits into from
Jun 29, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
env/
*.swp
.cache/
.pytest_cache/
*__pycache__*
*.egg-info/
# vim swap files
*.swp
# pyc files
*.pyc
1 change: 1 addition & 0 deletions gramophone/apps/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from .gp_app import create_gp_app
from .hy_app import create_hy_app
35 changes: 32 additions & 3 deletions gramophone/apps/gp_app.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,41 @@
from __future__ import absolute_import

from flask import Flask
from flask import request

def create_gp_app(mapping):
def create_gp_app(aligner,transcriber,rater,formatter):
app = Flask(__name__)

@app.route('/gp')
@app.route('/gp/', methods=['GET', 'POST'])
def index():
return mapping
if request.method == 'GET':

# get args
strings = request.args.getlist('w')
formats = request.args.getlist('f')

oformat = ""
if formats:
oformat = formats[0]

results = []
for string in strings:
segmentations = aligner.scan(string.lower())
best_transcription = []
best_prob = 0.0
for segmentation in segmentations:
transcriptions = transcriber.transcribe(segmentation)
for transcription in transcriptions:
prob = rater.rate([segmentation,transcription])
#click.echo("%s: %f" % (u",".join(transcription),prob), err=True)
if prob >= best_prob:
best_prob = prob
best_transcription = transcription
results.append((string,u",".join(best_transcription),prob))

return formatter.encode(results,oformat)

elif request.method == 'POST':
return str(request.form)

return app
36 changes: 36 additions & 0 deletions gramophone/apps/hy_app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from __future__ import absolute_import

from flask import Flask
from flask import request

def create_hy_app(coder,labeller,formatter):
app = Flask(__name__)

@app.route('/hy/', methods=['GET', 'POST'])
def index():
if request.method == 'GET':

# get args
strings = request.args.getlist('w')
formats = request.args.getlist('f')

oformat = ""
if formats:
oformat = formats[0]

results = []
for string in strings:
encodement = coder.encode(string,mode="scan")
labellings = labeller.label(encodement)
combination = []
for labelling in labellings:
for i in range(len(encodement)):
combination.append(u"%s\t%s" % (encodement[i],labelling[i]))
results.append((string,coder.decode(combination)))

return formatter.encode(results, oformat)

elif request.method == 'POST':
return str(request.form)

return app
1 change: 1 addition & 0 deletions gramophone/gp/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .alignment import Aligner
from .transcription import Transcriber
from .rating import Rater
from .formatting import Formatter
10 changes: 7 additions & 3 deletions gramophone/gp/alignment.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# -*- coding: utf-8 -*-

import sys
import pywrapfst as fst
import regex as re

Expand Down Expand Up @@ -41,9 +40,15 @@ def chain(self,g):
t.set_output_symbols(self.syms)
src = t.add_state()
t.set_start(src)
dest = src
for c in g:
# skip unknown symbols
try:
s = self.syms.find(c)
except:
continue
dest = t.add_state()
t.add_arc(src,fst.Arc(self.syms.find(c), self.syms.find(c), "0", dest))
t.add_arc(src,fst.Arc(s, s, "0", dest))
src = dest
t.set_final(dest)
return t
Expand Down Expand Up @@ -89,7 +94,6 @@ def __align_fst(self,g,p):
t4.project(project_output=True)

if t4.num_arcs(t4.start()) == 0:
sys.stderr.write(u"Empty expansion: %s %s\n" % (g, p))
return fst.Fst()

t5 = fst.compose(t3,self.E)
Expand Down
26 changes: 26 additions & 0 deletions gramophone/gp/formatting.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# -*- coding: utf-8 -*-

import json

class Formatter:

def __init__(self):
"""
The constructor.
"""
pass

def encode(self, io_tuples, fmt):

fmt = fmt.strip().lower()

if fmt == "txt":
return "\n".join("%s\t%s" % (triple[0], triple[1]) for triple in io_tuples)

elif fmt == "json":
result = []
for triple in io_tuples:
result.append({"word" : triple[0], "phonology" : triple[1], "probability" : "%.5f" % triple[2]})
return json.dumps(result)
else:
return str(io_tuples)
1 change: 1 addition & 0 deletions gramophone/hy/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from .coding import Coder
from .labelling import Labeller
from .formatting import Formatter
26 changes: 26 additions & 0 deletions gramophone/hy/formatting.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# -*- coding: utf-8 -*-

import json

class Formatter:

def __init__(self):
"""
The constructor.
"""
pass

def encode(self, io_tuples, fmt):

fmt = fmt.strip().lower()

if fmt == "txt":
return "\n".join("%s\t%s" % (pair[0], pair[1]) for pair in io_tuples)

elif fmt == "json":
result = []
for pair in io_tuples:
result.append({"word" : pair[0], "hyphenation" : pair[1]})
return json.dumps(result)
else:
return str(io_tuples)
6 changes: 3 additions & 3 deletions gramophone/scripts/gramophone.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def HY(name="hy"):
@click.option('-m', '--model', default='model', help='prefix of the output model files')
@click.argument('data')
def train_gp(mapping,model,data):
"""Train a model"""
"""Train a model."""

#
# stage 1: alignment
Expand Down Expand Up @@ -129,7 +129,7 @@ def apply_gp(mapping,crf,lm,strings):

# convert
for string in in_strings:
segmentations = aligner.scan(string)
segmentations = aligner.scan(string.lower())
best_transcription = []
best_prob = 0.0
for segmentation in segmentations:
Expand Down Expand Up @@ -195,7 +195,7 @@ def train_hy(model,data):
labeller.save(model + ".hy.crf")

@HY.command(name="apply")
@click.option('-c', '--crf', required=True, help='transcription CRF model')
@click.option('-c', '--crf', required=True, help='hyphenation CRF model')
@click.argument('strings', nargs=-1)
def apply_hy(crf,strings):
"""Convert strings"""
Expand Down
67 changes: 63 additions & 4 deletions gramophone/scripts/gramophone_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,71 @@
import click

from gramophone import apps
from gramophone import gp
from gramophone import hy

@click.command()
@click.group()
def cli():
pass

@cli.command(name="gp")
@click.option('-M', '--mapping', required=True, help='grapheme-phoneme mapping')
def run(mapping):
@click.option('-c', '--crf', required=True, help='transcription CRF model')
@click.option('-l', '--language-model', 'lm', required=True, help='rating language model')
def run_gp(mapping,crf,lm):
"""
Run the application
Run the g2p server.
"""
app = apps.create_gp_app(mapping)

#
# loading
#
click.echo(u"Loading...", err=True)

click.echo(u"...data alignment", err=True)
aligner = gp.Aligner(mapping=mapping)

click.echo(u"...transcription CRF model", err=True)
transcriber = gp.Transcriber()
transcriber.load(crf)

click.echo(u"...n-gram language model", err=True)
rater = gp.Rater.load(lm)

click.echo(u"...output formatter", err=True)
formatter = gp.Formatter()


#
# load app and run
#
app = apps.create_gp_app(aligner,transcriber,rater,formatter)
app.run()

@cli.command(name="hy")
@click.option('-c', '--crf', required=True, help='hyphenation CRF model')
def run_hy(crf):
"""
Run the hyphenation server.
"""

#
# loading
#
click.echo(u"Loading...", err=True)

click.echo(u"...coder", err=True)
coder = hy.Coder()

click.echo(u"...hyphenation CRF model", err=True)
labeller = hy.Labeller()
labeller.load(crf)

click.echo(u"...output formatter", err=True)
formatter = hy.Formatter()

#
# load app and run
#
app = apps.create_hy_app(coder, labeller, formatter)
app.run()
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
entry_points={
'console_scripts': [
'gramophone=gramophone.scripts.gramophone:cli',
'gramophone_server=gramophone.scripts.gramophone_server:run',
'gramophone-server=gramophone.scripts.gramophone_server:cli',
]
},
)