Speech-Recognizer-SPX
view release on metacpan or search on metacpan
SPX/Server.pm view on Meta::CPAN
# -*- cperl -*-
use strict;
# Speech::Recognizer::SPX::Server: Perl module for writing PocketSphinx
# streaming audio servers.
# Copyright (c) 2000 Cepstral LLC.
#
# This module is free software; you can redistribute it and/or modify
# it under the same terms as Perl itself.
#
# Written by David Huggins-Daines <dhuggins@cs.cmu.edu>
package Speech::Recognizer::SPX::Server;
use Speech::Recognizer::SPX qw(:fbs :uttproc $SPHINXDIR);
use Audio::SPX;
use Time::HiRes qw(usleep);
use Fcntl;
use Errno;
use vars qw($VERSION);
$VERSION=0.03;
my %defaults = ( -samprate => 16000,
-adcin => 'TRUE',
-lm => "$SPHINXDIR/model/lm/turtle/turtle.lm",
-dict => "$SPHINXDIR/model/lm/turtle/turtle.dic",
-hmm => "$SPHINXDIR/model/hmm/wsj1" );
sub init {
my ($this, $args, $sock, $log, $verbose) = @_;
my $class = ref $this || $this;
unless (defined $log) {
local *LOG;
open LOG, ">&STDERR" or die "can't dup: $!";
$log = *LOG;
# Readjust die() so we actually see messages ;(
$SIG{__DIE__} = sub { print $log @_; exit $! || ($? >> 8) || 255 };
}
print $log "initializing pocketsphinx\n" if $log;
my @argv = (%defaults, %$args, -verbose => $verbose);
{
my $i = 0;
foreach (@argv) {
unless ($i++ % 2) {
# Make argument names look like arguments to sphinx
$_ = "-$_" unless $_ =~ /^-/;
}
}
}
my %argv = @argv;
@argv = %argv;
fbs_init(\@argv)
or return undef;
my $sps = $args->{-samprate} || $defaults{-samprate};
my $self = { sock => $sock, log => $log,
sps => $sps, sockflags => 0,
timeout => 1000,
verbose => $verbose };
bless $self, $class;
}
SPX/Server.pm view on Meta::CPAN
print $log "waiting for audio\n" if $log;
my $s;
while (defined($s = $cad->read($adbuf, 2048)) && $s == 0) {
usleep 50_000;
}
goto failure unless defined $s;
my $ts = $cad->read_ts;
print $log "listening at $ts\n" if $log;
$cb_listen->($ts) if defined $cb_listen;
print $audio_fh $adbuf if defined $audio_fh;
uttproc_begin_utt() or goto failure;
uttproc_rawdata($adbuf, 0) or goto failure;
while (1) {
$adbuf = "";
$s = $cad->read($adbuf, 2048) or goto failure;
if ($s == 0) {
last if $cad->read_ts - $ts > int($sps * $self->{timeout} / 1000);
usleep(20_000);
} else {
$ts = $cad->read_ts;
print $audio_fh $adbuf if defined $audio_fh;
my $rem = uttproc_rawdata($adbuf, 0)
or goto failure;
}
}
$cad->reset;
fcntl $sock, F_SETFL, $self->{sockflags};
print $log "done listening at $ts\n" if $log;
$cb_not_listen->($ts) if defined $cb_not_listen;
uttproc_end_utt() or goto failure;
goto failure
unless (my ($fr, $hyp) = uttproc_result(1));
print $log "text is $hyp\n" if $log;
return $hyp;
failure:
# Need to restore this for the caller
fcntl $sock, F_SETFL, $self->{sockflags};
return undef;
}
sub fini {
fbs_end();
}
1;
__END__
=head1 NAME
Speech::Recognizer::SPX::Server - Perl module for writing streaming audio speech recognition servers using PocketSphinx
=head1 SYNOPSIS
my $sock = new IO::Socket(... blah blah blah ...);
my $log = new IO::File('server.log');
my $audio_fh = new IO::File('speech.raw');
my $srvr
= Speech::Recognizer::SPX::Server->init({ -arg => val, ... }, $sock, $log, $verbose)
or die "couldn't initialize pocketsphinx: $!";
my $client = new IO::Socket;
while (accept $sock, $client) {
next unless fork;
$srvr->sock($client);
$srvr->calibrate or die "couldn't calibrate audio stream: $!";
while (!$done && defined(my $txt
= $srvr->next_utterance(sub { print $log "listening\n" },
sub { print $log "not listening\n },
$audio_fh))) {
print "recognized text is $txt\n";
...
}
$srvr->fini or die "couldn't shut down server: $!";
exit 0;
}
=head1 DESCRIPTION
This module encapsulates a bunch of the stuff needed to write a
PocketSphinx server which takes streaming audio as input on an arbitrary
filehandle. It's not meant to be flexible or transparent - if you
want that, then read the code and write your own server program using
just the Speech::Recognizer::SPX module.
The interface is vaguely object-oriented, but unfortunately it is
presently not possible to create multiple instances of
Speech::Recognizer::SPX::Server within the same process, due to severe
limitations of the underlying PocketSphinx library. You can, however,
create multiple distinct servers with judicious use of C<fork>, as
shown in the example above.
It is possible that this will be fixed in a future release of PocketSphinx.
=head1 METHODS
=over 4
=item C<init>
my $srvr = Speech::Recognizer::SPX::Server->init(\%args, $sock, $log, $verbose);
C<%args> is a reference to a hash of argument => value pairs, exactly
like the arguments you would pass on the command line to one of the
sphinx example programs. Argument names can be given either with or
without a leading dash.
C<$sock> is a socket or other filehandle (could be anything, really)
on which the server will read audio data. This argument is optional
and not needed to initialize the server - you can set it later with
the C<sock> accessor.
C<$log> is a filehandle on which the server module will log messages.
This argument is optional. Without a filehandle to log on, these
messages (boring things like "started listening at $foo") will not be
printed.
C<$verbose> determines the verbosity level of the Sphinx library.
Currently, due to limitations in the PocketSphinx library, there are only
two options for this value, namely a true value for 'be insanely
verbose', or a false value for 'say nothing at all'.
=item C<calibrate>
$srvr->calibrate;
Calibrates the noise threshold for the continuous audio stream
(i.e. figures out when it should listen and when it shouldn't). This
requires you to actually have a ready and willing source of input on
the socket you set in C<init> or with C<sock>.
=item C<next_utterance>
my $text = $srvr->next_utterance($cb_listen, $cb_not_listen, $audio_fh);
Waits for and recognizes the next utterance in the data stream. All
arguments are optional:
C<$cb_listen> is a reference to (or name of, but I encourage you not
to do that) a subroutine to be called when the recognizer has detected
speech input.
( run in 0.444 second using v1.01-cache-2.11-cpan-98e64b0badf )