From 5665d1b749e5acf1c300f07de60cea43f27e4e15 Mon Sep 17 00:00:00 2001 From: Ondrej Platek Date: Sat, 21 Dec 2013 15:52:08 +0100 Subject: [PATCH] Kaldi decoder running on ptics with en LM Run alex/applications/PublicTransportInfoCs/vhub_test_kaldi --- .../PublicTransportInfoCS/kaldi.cfg | 22 ++++++++++++ .../PublicTransportInfoCS/vhub_test_kaldi | 5 +++ alex/components/asr/kaldi.py | 34 ++++++++++++++----- alex/resources/default.cfg | 1 + 4 files changed, 54 insertions(+), 8 deletions(-) create mode 100644 alex/applications/PublicTransportInfoCS/kaldi.cfg create mode 100755 alex/applications/PublicTransportInfoCS/vhub_test_kaldi diff --git a/alex/applications/PublicTransportInfoCS/kaldi.cfg b/alex/applications/PublicTransportInfoCS/kaldi.cfg new file mode 100644 index 00000000..bf2e8a3d --- /dev/null +++ b/alex/applications/PublicTransportInfoCS/kaldi.cfg @@ -0,0 +1,22 @@ +#!/usr/bin/env python +# vim: set fileencoding=UTF-8 filetype=python : +# +# When the configuration file is loaded, several automatic transformations +# are applied: +# +# 1) '{cfg_abs_path}' as a substring of atomic attributes is replaced by +# an absolute path of the configuration files. This can be used to +# make the configuration file independent of the location of programs +# using the configuration file. +# +# or better user use the as_project_path function + +config = { + 'ASR': { + 'debug': True, + 'type': 'Kaldi', + 'Kaldi': { + 'debug': True, + } + }, +} diff --git a/alex/applications/PublicTransportInfoCS/vhub_test_kaldi b/alex/applications/PublicTransportInfoCS/vhub_test_kaldi new file mode 100755 index 00000000..3c434ced --- /dev/null +++ b/alex/applications/PublicTransportInfoCS/vhub_test_kaldi @@ -0,0 +1,5 @@ +#!/bin/bash + +cd .. + +./vhub.py -c ./PublicTransportInfoCS/ptics.cfg ./PublicTransportInfoCS/kaldi.cfg diff --git a/alex/components/asr/kaldi.py b/alex/components/asr/kaldi.py index 5c13f45f..802abe8d 100644 --- a/alex/components/asr/kaldi.py +++ b/alex/components/asr/kaldi.py @@ -11,6 +11,8 @@ # FIXME PYTHONPATH I can change : sys.path insert into(0,) raise KaldiSetupException('%s\nTry setting PYTHONPATH or LD_LIBRARY_PATH' % e.message) import time +from datetime import datetime +import os class KaldiASR(object): @@ -25,6 +27,8 @@ def __init__(self, cfg): self.logger = cfg['Logging']['system_logger'] self.cfg = cfg kcfg = cfg['ASR']['Kaldi'] + + self.debug = kcfg['debug'] self.wst = wst2dict(kcfg['wst']) self.max_dec_frames = kcfg['max_dec_frames'] # specify all other options in config @@ -37,7 +41,6 @@ def __init__(self, cfg): self.decoder = PyGmmLatgenWrapper() self.decoder.setup(argv) - self.decoder.reset(keep_buffer_data=False) # FIXME is it necessary? def flush(self): """ @@ -56,14 +59,16 @@ def rec_in(self, frame): :frame: @todo :returns: self - The instance of KaldiASR """ - start = time.clock() + frame_total, start = 0, time.clock() self.decoder.frame_in(frame.payload) - self.logger.info('frame_in of %d frames' % (len(frame.payload) / 2)) + self.logger.debug('frame_in of %d frames' % (len(frame.payload) / 2)) dec_t = self.decoder.decode(max_frames=self.max_dec_frames) while dec_t > 0: + frame_total += dec_t dec_t = self.decoder.decode(max_frames=self.max_dec_frames) - self.logger.info('Forward decoding of %d frames in %s secs' % ( - dec_t, str(time.clock() - start))) + if (frame_total > 0): + self.logger.debug('Forward decoding of %d frames in %s secs' % ( + frame_total, str(time.clock() - start))) return self def hyp_out(self): @@ -71,16 +76,29 @@ def hyp_out(self): Returns recognizers hypotheses about the input speech audio. """ start = time.clock() + + # Get hypothesis self.decoder.prune_final() - lat = self.decoder.get_lattice() + utt_prob, lat = self.decoder.get_lattice() + self.decoder.reset(keep_buffer_data=False) + + # Convert lattice to nblist nbest = lattice_to_nbest(lat, n=5) nblist = UtteranceNBList() for w, word_ids in nbest: - words = [self.wst[str(i)] for i in word_ids] + words = ' '.join([self.wst[str(i)] for i in word_ids]) nblist.add(w, Utterance(words)) - self.logger.info('hyp_out: get_lattice+nbest in %s secs' % str(time.clock() - start)) + + # Log if len(nbest) == 0: self.logger.warning('hyp_out: empty hypothesis') nblist.add(1.0, Utterance('Empty hypothesis: DEBUG')) + if self.debug: + output_file_name = os.path.join( + self.logger.get_session_dir_name(), + '%s.fst' % str(datetime.now().strftime('%Y-%m-%d-%H-%M-%S.%f'))) + lat.write(output_file_name) + self.logger.info('utterance "probability" is %f' % utt_prob) + self.logger.debug('hyp_out: get_lattice+nbest in %s secs' % str(time.clock() - start)) return nblist diff --git a/alex/resources/default.cfg b/alex/resources/default.cfg index 1aa9af76..8cfd3adf 100644 --- a/alex/resources/default.cfg +++ b/alex/resources/default.cfg @@ -216,6 +216,7 @@ config = { }, }, 'Kaldi': { + 'debug': False, 'wst': os.path.abspath('../resources/asr/kaldi/words.txt'), 'config': os.path.abspath('../resources/asr/kaldi/decode.conf'), 'verbose': 0,