forked from MarineBioAcousticsRC/Triton
-
Notifications
You must be signed in to change notification settings - Fork 0
/
HViteOutputParse.py
55 lines (36 loc) · 1.34 KB
/
HViteOutputParse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#!/bin/env python
# Given threee files:
# token_timings (input) - containing: start_t stop_t "token"
# master label file (input) - label file generated by HTK with class information
# output file
#
# merge the two input files to create a new timing file with the class label
# standard libraries
import os
import sys
import re
import pdb
# custom libraries
sys.path.append("c:/bin-htk/lib")
sys.path.append(os.path.expanduser("~/bin-htk/lib"))
import labels
def main():
if len(sys.argv) != 4:
raise ValueError, "Need token file and master label file"
token_h = open(sys.argv[1])
MLFs = labels.Labels()
MLFs.add_mlf(sys.argv[2])
output_h = open(sys.argv[3], 'w')
line_match = re.compile('(?P<Start>\d+\.?\d*)+ (?P<Stop>[\d\.])+ "(?P<Token>.*)"')
for line in token_h:
match = line_match.match(line)
if match:
# Get information for current region
(start, stop, token) = match.groups()
# find class of token
label_info = MLFs.access("%s.rec"%(token))
label = label_info[0][0] # pull out class label
score = label_info[0][1] # log likelihood of class
# write out new label information
output_h.write('%f %f "%s"\n'%(start, stop, label))
main()