使用htk搭建语音拨号系统(linux系统,HTK-3.5.beta-2.tar.gz)
cd HTKLib
make -f MakefileCPU all
cp -rp ./HTKLiblv.a ../HTKTools/
cd HTKTools/
make -f MakefileCPU all
(参考htkbook-3.5.alpha.pdf)
Step 1 - the Task Grammar (get wdnet)
gram.txt:
$digit=ONE | TWO | THREE | FOUR | FIVE |
SIX | SEVEN | EIGHT | NINE | OH | ZERO ;
$name = [ JOOP ] JANSEN |
[ JULIAN ] ODELL |
[ DAVE ] OLLASON |
[ PHIL ] WOODLAND |
[ STEVE ] YOUNG ;
(SENT-START ( DIAL <$digit> | (PHONE|CALL) $name ) SENT-END )
./HParse gram.txt wdnet.txt
Step 2 - the Dictionary (get dict.txt)
beep发音字典文件,需要到beep网站自己下载
global.ded:
AS sp
RS cmu
MP sil sil sp
./HDMan -m -w swlist.txt -n monophones1 -l dlog dict.txt beep names.txt
Step 3 - Recording the Data
随机生成录音文本提示testprompts.txt
./HSGen -l -n 200 wdnet.txt dict.txt >testprompts.txt
Step 4 - Creating the Transcription Files
perl prompts2mlf mlf.txt testprompts.txt
cp -rp ./mlf.txt ./words.mlf
./HLEd -l '*' -d dict.txt -i phones0.mlf mkphones0.led words.mlf
Step 5 - Coding the Data
config1文件内容
# Coding parameters
SOURCEFORMAT = WAV
#SOURCEFORMAT = HTK
TARGETKIND = MFCC_0
TARGETRATE = 100000.0
#SAVECOMPRESSED = T
#SAVEWITHCRC = T
WINDOWSIZE = 250000.0
USEHAMMING = T
PREEMCOEF = 0.97
NUMCHANS = 26
CEPLIFTER = 22
NUMCEPS = 12
ENORMALISE = F
./HCopy -T 1 -C config1 -S codetr1.scp
codetr1.scp文件内容如下:
./data/train/speech/001.wav ./data/train/feature/001.mfc
./data/train/speech/002.wav ./data/train/feature/002.mfc
./data/train/speech/003.wav ./data/train/feature/003.mfc
./data/train/speech/004.wav ./data/train/feature/004.mfc
Step 6 - Creating Flat Start Monophones
config2文件内容
# Coding parameters
#SOURCEFORMAT = WAV
SOURCEFORMAT = HTK
TARGETKIND = MFCC_0_D_A
TARGETRATE = 100000.0
#SAVECOMPRESSED = T
#SAVEWITHCRC = T
WINDOWSIZE = 250000.0
USEHAMMING = T
PREEMCOEF = 0.97
NUMCHANS = 26
CEPLIFTER = 22
NUMCEPS = 12
ENORMALISE = F
proto文件内容
~o <VECSIZE> 39 <MFCC_0_D_A>
~h "proto"
<BeginHMM>
<NumStates> 5
<State> 2
<Mean> 39
0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
<Variance> 39
1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
<State> 3
<Mean> 39
0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
<Variance> 39
1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
<State> 4
<Mean> 39
0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
<Variance> 39
1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
<TransP> 5
0.0 1.0 0.0 0.0 0.0
0.0 0.6 0.4 0.0 0.0
0.0 0.0 0.7 0.3 0.0
0.0 0.0 0.0 0.7 0.3
0.0 0.0 0.0 0.0 0.0
<EndHMM>
./HCompV -C config2 -f 0.01 -m -S trainmfcc.scp -M hmm0 proto
制作macros文件和hmmdef文件(根据proto文件和vFloors文件)
根据vFloors制作macros文件
macros文件内容如下
~o
<VECSIZE> 39<MFCC_0_D_A>
~v varFloor1
<Variance> 39
4.492153e-001 2.800227e-001 4.004902e-001 7.262168e-001 3.713427e-001 5.923348e-001 3.089855e-001 3.635918e-001 4.011551e-001 3.448929e-001 3.661570e-001 3.404307e-001 7.104830e-001 1.414941e-002 1.002086e-002 1.289929e-002 1.967196e-002 1.588490e-002 1.981885e-002 1.694523e-002 2.165956e-002 1.937735e-002 1.799082e-002 1.821838e-002 1.620020e-002 1.004474e-002 1.865744e-003 1.427446e-003 1.801455e-003 2.748002e-003 2.518953e-003 3.164474e-003 3.122217e-003 3.736564e-003 3.291466e-003 3.174342e-003 3.133416e-003 2.797257e-003 1.262979e-003
生成hmmdef文件,将 monophones1 中所有因素 31个,依次替换proto文件中的 ~h "proto" (proto -->k...)生成hmmdef文件 ,手工生成或者编写python脚本
1 k
2 ao
3 l
4 d
5 ey
6 v
7 ay
8 ax
9 t
10 f
11 r
12 jh
13 ae
14 n
15 s
16 eh
17 uw
18 p
19 ia
20 oh
21 ow
22 ah
23 w
24 ih
25 sil
26 iy
27 th
28 uh
29 y
30 ng
31 z
#转换前
#~h "proto"
#<BEGINHMM>
#<NUMSTATES> 5
#转换后
#~h "th"
#<BEGINHMM>
#<NUMSTATES> 5
hmmdefs 文件内容如下:
~h "k"
<BeginHMM>
<NumStates> 5
<State> 2
<Mean> 39
0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0 0.0 0.0 0.0 0.0 0.0 0.0 <Variance> 39
1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
1.0 1.0 1.0 1.0 1.0 1.0 1.0
<State> 3
<Mean> 39
0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0
.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 <Variance> 39
1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1
.0 1.0 1.0 1.0 1.0 1.0 1.0
<State> 4
<Mean> 39
0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0 0.0 0.0 0.0 0.0 0.0 0.0 <Variance> 39
1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
1.0 1.0 1.0 1.0 1.0 1.0 1.0<TransP> 5
0.0 1.0 0.0 0.0 0.0
0.0 0.6 0.4 0.0 0.0
0.0 0.0 0.7 0.3 0.0
0.0 0.0 0.0 0.7 0.3
0.0 0.0 0.0 0.0 0.0
<EndHMM>
~h "ao"
<BeginHMM>
...
<EndHMM>
将phones0.mlf的格式变为phones01.mlf的格式
主要是将 1..lab 变为001..lab 与feature mfc文件的名字一致
phones0.mlf:
#!MLF!#
"*/1..lab"
sil
sil
k
ao
l
jh
ae
n
s
eh
n
sil
sil
.
"*/2..lab"
sil
sil
d
ay
ax
l
th
r
iy
sil
sil
.
"*/3..lab"
phones01.mlf:
#!MLF!#
"*/001..lab"
sil
sil
k
ao
l
jh
ae
n
s
eh
n
sil
sil
.
"*/002..lab"
sil
sil
d
ay
ax
l
th
r
iy
sil
sil
.
"*/003..lab"
迭代计算hmm3/hmmdefs (至少迭代2次以上)
mkdir hmm1
./HERest -C config2 -I phones01.mlf -t 250.0 150.0 1000.0 -S trainmfcc.scp -H hmm0/macros -H hmm0/hmmdefs -M hmm1 monophones1
mkdir hmm2
./HERest -C config2 -I phones01.mlf -t 250.0 150.0 1000.0 -S trainmfcc.scp -H hmm1/macros -H hmm1/hmmdefs -M hmm2 monophones1
mkdir hmm3
./HERest -C config2 -I phones01.mlf -t 250.0 150.0 1000.0 -S trainmfcc.scp -H hmm2/macros -H hmm2/hmmdefs -M hmm3 monophones1
Step 7 - Fixing the Silence Models
7.1 Use a text editor on the file hmm3/hmmdefs to copy the centre state of the sil model to make
a new sp model and store the resulting MMF hmmdefs, which includes the new sp model, in
the new directory hmm4.
mkdir hmm4
hmmdefs (add a new model sp)(copy the centre state of the sil model to make a new sp model)
./makesp sil.txt >sp.txt
copy ~h "sp" to hmm4/hmmdefs
~h "sp"
<BEGINHMM>
<NUMSTATES> 3
<STATE> 2
<MEAN> 39
-1.182995e+01 -4.697730e+00 6.436271e-01 1.527302e+00 -3.230272e+00 -3.413457e+00 -3.028001e+00 -8.141957e+00 -2.843496e-02 -3.121023e+00 2.026845e+00 -1.828322e+00 5
.827830e+01 3.562942e-01 3.167303e-01 5.430892e-02 1.031229e-01 -5.339017e-02 -6.599801e-02 -1.794442e-01 1.987328e-01 1.873803e-02 1.121706e-01 1.848500e-01 -1.123776e-01 -3.444094e-01 -4.896715e-02 -8.148830e-03 -9.422581e-02 -4.685211e-02 5.136055e-02 5.905451e-02 -1.727715e-02 -3.533714e-02 -1.032446e-02 9.806012e-02 -3.933425e-02 -1.641800e-02 -1.147459e-01
<VARIANCE> 39
2.220251e+01 3.221262e+01 2.709606e+01 4.915327e+01 5.197477e+01 3.774834e+01 4.730363e+01 6.736674e+01 4.311512e+01 3.672880e+01 3.356824e+01 3.153955e+01 2.850236e+
01 3.371483e+00 2.459353e+00 1.716157e+00 2.884690e+00 3.064879e+00 2.742849e+00 3.189539e+00 7.349804e+00 3.466556e+00 2.946460e+00 2.790798e+00 2.538562e+00 3.291097e+00 6.037328e-01 5.250056e-01 3.036307e-01 6.175983e-01 6.131968e-01 5.488435e-01 6.583940e-01 1.444453e+00 6.972485e-01 5.779681e-01 5.735948e-01 4.978335e-01 7.772070e-01
<GCONST> 1.268697e+02
<TRANSP> 3
0.000000e+00 5.000000e-01 5.000000e-01
0.000000e+00 5.000000e-01 5.000000e-01
0.000000e+00 0.000000e+00 0.000000e+00
<ENDHMM>
mkdir hmm5
add "sp" to monophones1
注意数字不要跨行
./HHEd -H hmm4/macros -H hmm4/hmmdefs -M hmm5 sil.hed monophones2
mkdir hmm6
mkdir hmm7
./HERest -C config2 -I phones01.mlf -t 250.0 150.0 1000.0 -S trainmfcc.scp -H hmm5/macros -H hmm5/hmmdefs -M hmm6 monophones2
./HERest -C config2 -I phones01.mlf -t 250.0 150.0 1000.0 -S trainmfcc.scp -H hmm6/macros -H hmm6/hmmdefs -M hmm7 monophones2
Step 8 - Realigning the Training Data
#./HVite -l '*' -o SWT -b silence -C config2 -a -H hmm7/macros -H hmm7/hmmdefs -i aligned.mlf -m -t 250.0 -y lab -I words01.mlf -S trainmfcc.scp dict.txt monophones2 (-b silence 导致生成aligned.mlf失败)
需要将words01.mlf 内容改变
#!MLF!#
"s001.lab"
变成(行首增加 “*/”)
#!MLF!#
"*/s001.lab"
cp words.mlf words01.mlf
./HVite -l '*' -o SWT -C config2 -a -H hmm7/macros -H hmm7/hmmdefs -i aligned.mlf -m -t 250.0 -y lab -I words01.mlf -S trainmfcc.scp dict.txt monophones2
Step 8.1 - 先来看一下成果,使用训练集做识别正确率测试 (recout.mlf是识别结果)
trainmfcc.scp文件内容如下:
./data/train/feature/s001.mfc
./data/train/feature/s002.mfc
./data/train/feature/s003.mfc
./data/train/feature/s004.mfc
./data/train/feature/s005.mfc
./HVite -H hmm7/macros -H hmm7/hmmdefs -S trainmfcc.scp -l '*' -i recout.mlf -w wdnet.txt -p 0.0 -s 5.0 dict.txt monophones2
./HResults -I words.mlf monophones2 recout.mlf
====================== HTK Results Analysis =======================
Date: Wed Dec 13 14:26:25
Ref : words.mlf
Rec : recout.mlf
------------------------ Overall Results --------------------------
SENT: %Correct=0.00 [H=0, S=100, N=100]
WORD: %Corr=70.08, Acc=68.64 [H=534, D=202, S=26, I=11, N=762]
===================================================================
通过分析参考答案文件words.mlf或recout.mlf,发现在每一句上都加上了SENT-START和SENT-END。这是与标注真值文本或者参考答案words.mlf无法完全吻合的黑手呀!一个解决办法是在运行HResults时加入-e 选项来忽略掉SENT-START 和SENT-END,如下所示:
(下面的??? 不是乱码,是参数)
./HResults -e ??? SENT-START -e ??? SENT-END -I words.mlf monophones2 recout.mlf
====================== HTK Results Analysis =======================
Date: Wed Dec 13 14:32:13
Ref : words.mlf
Rec : recout.mlf
------------------------ Overall Results --------------------------
SENT: %Correct=85.00 [H=85, S=15, N=100]
WORD: %Corr=95.02, Acc=92.88 [H=534, D=3, S=25, I=12, N=562]
===================================================================