wrznr · wrznr · Jun 29, 2018 · Nov 9, 2017 · Nov 15, 2017 · Jun 1, 2018
diff --git a/.gitignore b/.gitignore
@@ -1,6 +1,9 @@
 env/
-*.swp
 .cache/
 .pytest_cache/
 *__pycache__*
 *.egg-info/
+# vim swap files
+*.swp
+# pyc files
+*.pyc
diff --git a/gramophone/apps/__init__.py b/gramophone/apps/__init__.py
@@ -1 +1,2 @@
 from .gp_app import create_gp_app
+from .hy_app import create_hy_app
diff --git a/gramophone/apps/gp_app.py b/gramophone/apps/gp_app.py
@@ -1,12 +1,41 @@
 from __future__ import absolute_import
 
 from flask import Flask
+from flask import request
 
-def create_gp_app(mapping):
+def create_gp_app(aligner,transcriber,rater,formatter):
     app = Flask(__name__)
 
-    @app.route('/gp')
+    @app.route('/gp/', methods=['GET', 'POST'])
     def index():
-        return mapping
+        if request.method == 'GET':
+
+            # get args
+            strings = request.args.getlist('w')
+            formats = request.args.getlist('f')
+
+            oformat = ""
+            if formats:
+                oformat = formats[0]
+
+            results = []
+            for string in strings:
+                segmentations = aligner.scan(string.lower())
+                best_transcription = []
+                best_prob = 0.0
+                for segmentation in segmentations:
+                    transcriptions = transcriber.transcribe(segmentation)
+                    for transcription in transcriptions:
+                        prob = rater.rate([segmentation,transcription])
+                        #click.echo("%s: %f" % (u",".join(transcription),prob), err=True)
+                        if prob >= best_prob:
+                            best_prob = prob
+                            best_transcription = transcription
+                results.append((string,u",".join(best_transcription),prob))
+
+            return formatter.encode(results,oformat)
+
+        elif request.method == 'POST':
+            return str(request.form)
 
     return app
diff --git a/gramophone/apps/hy_app.py b/gramophone/apps/hy_app.py
@@ -0,0 +1,36 @@
+from __future__ import absolute_import
+
+from flask import Flask
+from flask import request
+
+def create_hy_app(coder,labeller,formatter):
+    app = Flask(__name__)
+
+    @app.route('/hy/', methods=['GET', 'POST'])
+    def index():
+        if request.method == 'GET':
+
+            # get args
+            strings = request.args.getlist('w')
+            formats = request.args.getlist('f')
+
+            oformat = ""
+            if formats:
+                oformat = formats[0]
+
+            results = []
+            for string in strings:
+                encodement = coder.encode(string,mode="scan")
+                labellings = labeller.label(encodement)
+                combination = []
+                for labelling in labellings:
+                    for i in range(len(encodement)):
+                        combination.append(u"%s\t%s" % (encodement[i],labelling[i]))
+                    results.append((string,coder.decode(combination)))
+
+            return formatter.encode(results, oformat)
+
+        elif request.method == 'POST':
+            return str(request.form)
+
+    return app
diff --git a/gramophone/gp/__init__.py b/gramophone/gp/__init__.py
@@ -1,3 +1,4 @@
 from .alignment import Aligner
 from .transcription import Transcriber
 from .rating import Rater
+from .formatting import Formatter
diff --git a/gramophone/gp/alignment.py b/gramophone/gp/alignment.py
@@ -1,6 +1,5 @@
 # -*- coding: utf-8 -*-
 
-import sys
 import pywrapfst as fst
 import regex as re
 
@@ -41,9 +40,15 @@ def chain(self,g):
             t.set_output_symbols(self.syms)
             src = t.add_state()
             t.set_start(src)
+            dest = src
             for c in g:
+                # skip unknown symbols
+                try:
+                  s = self.syms.find(c)
+                except:
+                  continue
                 dest = t.add_state()
-                t.add_arc(src,fst.Arc(self.syms.find(c), self.syms.find(c), "0", dest))
+                t.add_arc(src,fst.Arc(s, s, "0", dest))
                 src = dest
             t.set_final(dest)
         return t
@@ -89,7 +94,6 @@ def __align_fst(self,g,p):
         t4.project(project_output=True)
 
         if t4.num_arcs(t4.start()) == 0:
-            sys.stderr.write(u"Empty expansion: %s %s\n" % (g, p))
             return fst.Fst()
 
         t5 = fst.compose(t3,self.E)

diff --git a/gramophone/gp/formatting.py b/gramophone/gp/formatting.py
@@ -0,0 +1,26 @@
+# -*- coding: utf-8 -*-
+
+import json
+
+class Formatter:
+
+    def __init__(self):
+        """
+        The constructor.
+        """
+        pass
+
+    def encode(self, io_tuples, fmt):
+
+        fmt = fmt.strip().lower()
+
+        if fmt == "txt":
+            return "\n".join("%s\t%s" % (triple[0], triple[1]) for triple in io_tuples)
+
+        elif fmt == "json":
+            result = []
+            for triple in io_tuples:
+                result.append({"word" : triple[0], "phonology" : triple[1], "probability" : "%.5f" % triple[2]})
+            return json.dumps(result)
+        else:
+            return str(io_tuples)
diff --git a/gramophone/hy/__init__.py b/gramophone/hy/__init__.py
@@ -1,2 +1,3 @@
 from .coding import Coder
 from .labelling import Labeller
+from .formatting import Formatter
diff --git a/gramophone/hy/formatting.py b/gramophone/hy/formatting.py
@@ -0,0 +1,26 @@
+# -*- coding: utf-8 -*-
+
+import json
+
+class Formatter:
+
+    def __init__(self):
+        """
+        The constructor.
+        """
+        pass
+
+    def encode(self, io_tuples, fmt):
+
+        fmt = fmt.strip().lower()
+
+        if fmt == "txt":
+            return "\n".join("%s\t%s" % (pair[0], pair[1]) for pair in io_tuples)
+
+        elif fmt == "json":
+            result = []
+            for pair in io_tuples:
+                result.append({"word" : pair[0], "hyphenation" : pair[1]})
+            return json.dumps(result)
+        else:
+            return str(io_tuples)
diff --git a/gramophone/scripts/gramophone.py b/gramophone/scripts/gramophone.py
@@ -27,7 +27,7 @@ def HY(name="hy"):
 @click.option('-m', '--model', default='model', help='prefix of the output model files')
 @click.argument('data')
 def train_gp(mapping,model,data):
-    """Train a model"""
+    """Train a model."""
 
     #
     # stage 1: alignment
@@ -129,7 +129,7 @@ def apply_gp(mapping,crf,lm,strings):
 
     # convert
     for string in in_strings:
-        segmentations = aligner.scan(string)
+        segmentations = aligner.scan(string.lower())
         best_transcription = []
         best_prob = 0.0
         for segmentation in segmentations:
@@ -195,7 +195,7 @@ def train_hy(model,data):
     labeller.save(model + ".hy.crf")
 
 @HY.command(name="apply")
-@click.option('-c', '--crf', required=True, help='transcription CRF model')
+@click.option('-c', '--crf', required=True, help='hyphenation CRF model')
 @click.argument('strings', nargs=-1)
 def apply_hy(crf,strings):
     """Convert strings"""

diff --git a/gramophone/scripts/gramophone_server.py b/gramophone/scripts/gramophone_server.py
@@ -3,12 +3,71 @@
 import click
 
 from gramophone import apps
+from gramophone import gp
+from gramophone import hy
 
-@click.command()
+@click.group()
+def cli():
+    pass
+
+@cli.command(name="gp")
 @click.option('-M', '--mapping', required=True, help='grapheme-phoneme mapping')
-def run(mapping):
+@click.option('-c', '--crf', required=True, help='transcription CRF model')
+@click.option('-l', '--language-model', 'lm', required=True, help='rating language model')
+def run_gp(mapping,crf,lm):
     """
-    Run the application
+    Run the g2p server.
     """
-    app = apps.create_gp_app(mapping)
+
+    #
+    # loading
+    #
+    click.echo(u"Loading...", err=True)
+
+    click.echo(u"...data alignment", err=True)
+    aligner = gp.Aligner(mapping=mapping)
+
+    click.echo(u"...transcription CRF model", err=True)
+    transcriber = gp.Transcriber()
+    transcriber.load(crf)
+
+    click.echo(u"...n-gram language model", err=True)
+    rater = gp.Rater.load(lm)
+
+    click.echo(u"...output formatter", err=True)
+    formatter = gp.Formatter()
+
+
+    #
+    # load app and run
+    #
+    app = apps.create_gp_app(aligner,transcriber,rater,formatter)
+    app.run()
+
+@cli.command(name="hy")
+@click.option('-c', '--crf', required=True, help='hyphenation CRF model')
+def run_hy(crf):
+    """
+    Run the hyphenation server.
+    """
+
+    #
+    # loading
+    #
+    click.echo(u"Loading...", err=True)
+
+    click.echo(u"...coder", err=True)
+    coder = hy.Coder()
+
+    click.echo(u"...hyphenation CRF model", err=True)
+    labeller = hy.Labeller()
+    labeller.load(crf)
+
+    click.echo(u"...output formatter", err=True)
+    formatter = hy.Formatter()
+
+    #
+    # load app and run
+    #
+    app = apps.create_hy_app(coder, labeller, formatter)
     app.run()
diff --git a/setup.py b/setup.py
@@ -22,7 +22,7 @@
     entry_points={
           'console_scripts': [
               'gramophone=gramophone.scripts.gramophone:cli',
-              'gramophone_server=gramophone.scripts.gramophone_server:run',
+              'gramophone-server=gramophone.scripts.gramophone_server:cli',
           ]
     },
 )
Original file line number	Diff line number	Diff line change
		@@ -1 +1,2 @@
		from .gp_app import create_gp_app
		from .hy_app import create_hy_app