Edinburgh Speech Tools  2.4-release
relation_io.cc
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1995,1996 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Paul Taylor updated by awb */
34 /* Date : Feb 1999 */
35 /*-----------------------------------------------------------------------*/
36 /* Relation class file i/o, label files */
37 /* */
38 /*=======================================================================*/
39 #include <cstdlib>
40 #include <cstdio>
41 #include <fstream>
42 #include "EST_unix.h"
43 #include "EST_types.h"
44 #include "ling_class/EST_Relation.h"
45 #include "EST_string_aux.h"
46 #include "EST_cutils.h"
47 #include "EST_TList.h"
48 #include "EST_Option.h"
49 #include "relation_io.h"
50 
51 #define DEF_SAMPLE_RATE 16000
52 #define HTK_UNITS_PER_SECOND 10000000
53 
54 static EST_Regex RXleadingwhitespace("^[ \t\n\r][ \t\n\r]*.*$");
55 
56 EST_read_status read_label_portion(EST_TokenStream &ts, EST_Relation &s,
57  int sample);
58 
59 EST_read_status load_esps_label(EST_TokenStream &ts,EST_Relation &rel)
60 {
61  ts.set_SingleCharSymbols(";");
62  ts.set_quotes('"','\\');
63  EST_String key, val;
64 
65  // Skip the header
66  while (!ts.eof())
67  {
68  key = ts.get().string();
69  if (key == "#")
70  break;
71 
72  val = ts.get_upto_eoln().string();
73  // delete leading whitespace
74  if (val.matches(RXleadingwhitespace))
75  val = val.after(RXwhite);
76  rel.f.set(key, val);
77  }
78 
79  if (ts.peek() == "") return format_ok;
80 
81  while (!ts.eof())
82  {
83  EST_Item *si = rel.append();
84  EST_String name;
85 
86  si->set("end",(float)atof(ts.get().string()));
87  ts.get(); // skip the color;
88 
89  for (name = ""; (!ts.eoln()) && (ts.peek() != ";"); )
90  {
91  EST_Token &t = ts.get();
92  if (name.length() > 0) // preserve internal whitespace
93  name += t.whitespace();
94  name += t.string();
95  }
96  si->set_name(name);
97 
98  if (ts.peek().string() == ";") // absorb separator
99  {
100  ts.get();
101  si->features().load(ts);
102  }
103  }
104  return format_ok;
105 }
106 
107 EST_write_status save_esps_label(const EST_String &filename,
108  const EST_Relation &s,
109  bool evaluate_ff)
110 {
111  ostream *outf;
112  if (filename == "-")
113  outf = &cout;
114  else
115  outf = new ofstream(filename);
116 
117  if (!(*outf))
118  {
119  cerr << "save_esps_label: can't open label output file \"" <<
120  filename << "\"" << endl;
121  return write_fail;
122  }
123 
124  EST_write_status st=save_esps_label(outf, s, evaluate_ff);
125 
126  if (outf != &cout)
127  delete outf;
128 
129  return st;
130 }
131 
132 EST_write_status save_esps_label(ostream *outf,
133  const EST_Relation &s,
134  bool evaluate_ff)
135 {
136  EST_Item *ptr;
137 
138  *outf << "separator ;\n";
139  if (!s.f.present("nfields"))
140  *outf << "nfields 1\n";
141 
143  for (p.begin(s.f); p; ++p)
144  *outf << p->k << " " << p->v << endl;
145 
146  *outf << "#\n";
147 /* if (f("timing_style") == "event")
148  *outf << "timing_style event\n";
149  else if (f("timing_style") == "unit")
150  *outf << "timing_style unit\n";
151 */
152 
153  for (ptr = s.head(); ptr != 0; ptr = inext(ptr))
154  {
155  *outf << "\t";
156  outf->precision(5);
157  outf->setf(ios::scientific, ios::floatfield);
158  outf->width(8);
159  // outf->fill('0');
160  if (s.f("timing_style","0") == "event")
161  *outf << ptr->F("time",0);
162  else
163  *outf << ptr->F("end",0);
164 
165  *outf << " 26 \t" << ptr->S("name","0");
166 
167  EST_Features f2;
168  f2 = ptr->features();
169  f2.remove("name");
170  f2.remove("end");
171  if (evaluate_ff)
172  evaluate(ptr,f2);
173 
174  if (f2.length() > 0)
175  {
176  *outf << " ; ";
177  f2.save(*outf);
178  }
179  *outf << endl;
180  }
181 
182  return write_ok;
183 }
184 
185 EST_read_status load_ogi_label(EST_TokenStream &ts, EST_Relation &s)
186 {
187  // This function reads OGI style label files. The start, end
188  // time and names of the labels are mandatory.
189  EST_String key, val;
190  float sr;
191  int isr;
192 
193  // set up the character constant values for this stream
194  ts.set_SingleCharSymbols(";");
195 
196  // Skip over header
197 
198  while(!ts.eof())
199  {
200  if ((ts.peek().col() == 0) && (ts.peek() == "END"))
201  {
202  if (ts.peek() == "END")
203  { // read rest of header
204  ts.get();
205  ts.get();
206  ts.get();
207  }
208  break;
209  }
210  key = ts.get().string();
211  val = ts.get().string();
212  }
213 
214  sr = 1000.0 / atof(val);
215  isr = (int)sr;
216 
217  if (ts.eof())
218  {
219  cerr << "Error: couldn't find header in label file "
220  << ts.filename() << endl;
221  return wrong_format;
222  }
223 
224  if (read_label_portion(ts, s, isr) == misc_read_error)
225  {
226  cerr << "error: in label file " << ts.filename() << " at line " <<
227  ts.linenum() << endl;
228  return misc_read_error;
229  }
230  return format_ok;
231 }
232 
233 EST_read_status load_words_label(EST_TokenStream &ts, EST_Relation &s)
234 {
235  // This function reads label files in the form of simple word strings
236  // with no timing information.
237  EST_Item *item;
238 
239  while (!ts.eof())
240  {
241  item = s.append();
242  item->set("name",(EST_String)ts.get());
243  item->set("end",0.0);
244  }
245 
246  return format_ok;
247 }
248 
249 static float convert_long_num_string_to_time(const char *s,int sample)
250 {
251  // For those label files that think 100 nanosecond times are cool
252  // we have to provide a special function to convert them as
253  // this quickly gets beyond the capabilities of ints.
254 
255  if (strlen(s) < 15)
256  return atof(s)/sample;
257  else
258  {
259  double a = 0,d;
260  int i=0;
261  for (i=0;
262  (strchr(" \n\r\t",s[i]) != NULL) && (s[i] != '\0');
263  i++);
264 
265  for ( ;
266  (s[i] != '\0') && (s[i] >= '0') && (s[i] <= '9');
267  i++)
268  {
269  a = a*10;
270  d = s[i]-'0';
271  a += (d/(double)sample);
272  }
273  return a;
274  }
275 }
276 
277 EST_read_status read_label_portion(EST_TokenStream &ts, EST_Relation &s,
278  int sample)
279 {
280  EST_Item *item;
281  float hend;
282  EST_String str;
283 
284  while(!ts.eof())
285  {
286  str = ts.get().string();
287  if (str == ".")
288  return format_ok;
289 
290  item = s.append();
291 
292  str = ts.get().string();
293  hend = convert_long_num_string_to_time(str,sample);
294 
295  item->set("end",hend); // time
296  item->set("name",ts.get().string()); // name
297 
298  if (!ts.eoln())
299  item->set("rest_lab",ts.get_upto_eoln().string());
300  }
301 
302  return format_ok;
303 }
304 
305 EST_read_status load_sample_label(EST_TokenStream &ts,
306  EST_Relation &s, int sample)
307 {
308 
309  if (sample == 0) // maybe this should be an error
310  sample = DEF_SAMPLE_RATE;
311 
312  // set up the character constant values for this stream
313  ts.set_SingleCharSymbols(";");
314 
315  s.clear();
316  if (read_label_portion(ts, s, sample) == misc_read_error)
317  {
318  cerr << "error: in label file " << ts.filename() << " at line " <<
319  ts.linenum() << endl;
320  return misc_read_error;
321  }
322  return format_ok;
323 }
324 
325 EST_write_status save_htk_label(const EST_String &filename,
326  const EST_Relation &a)
327 {
328  ostream *outf;
329  if (filename == "-")
330  outf = &cout;
331  else
332  outf = new ofstream(filename);
333 
334  if (!(*outf))
335  {
336  cerr << "save_htk_label: can't open label output file \"" <<
337  filename << "\"" << endl;
338  return write_fail;
339  }
340 
341  EST_write_status s = save_htk_label(outf, a);
342 
343 
344  if (outf != &cout)
345  delete outf;
346 
347  return s;
348 }
349 
350 EST_write_status save_htk_label(ostream *outf,
351  const EST_Relation &a)
352 {
353  EST_Item *ptr;
354  float end,start;
355 
356  outf->precision(6);
357 
358  start = end = 0;
359  for (ptr = a.head(); ptr != 0; ptr = inext(ptr))
360  {
361  outf->width(15);
362  cout.setf(ios::left,ios::adjustfield);
363  *outf << (int)(start * HTK_UNITS_PER_SECOND);
364  outf->width(15);
365  end = ptr->F("end",0.0);
366  *outf << (int)(end * HTK_UNITS_PER_SECOND);
367  *outf << " " << ptr->name() << endl;
368  start = end;
369  }
370 
371  return write_ok;
372 }
373 
374 #if 0
375 EST_write_status save_label_spn(const EST_String &filename,
376  const EST_Relation &a)
377 {
378  EST_Stream_Item *ptr;
379 
380  ostream *outf;
381  if (filename == "-")
382  outf = &cout;
383  else
384  outf = new ofstream(filename);
385 
386  if (!(*outf))
387  {
388  cerr << "save_label_spn: can't open label output file \""
389  << filename << "\"" << endl;
390  return write_fail;
391  }
392 
393  ptr = a.head();
394  outf->precision(3);
395  outf->setf(ios::left, ios::adjustfield);
396  outf->width(8);
397  *outf << ptr->name();
398  outf->setf(ios::scientific, ios::floatfield);
399  outf->width(8);
400  *outf << (ptr->dur() * 1000.0) << "\t (0,140)" << endl;
401 
402  for (; inext(ptr) != 0; ptr = inext(ptr))
403  {
404  outf->precision(3);
405  outf->setf(ios::left, ios::adjustfield);
406  outf->width(8);
407  *outf << ptr->name();
408  outf->setf(ios::scientific, ios::floatfield);
409  outf->width(8);
410  *outf << (ptr->dur() * 1000.0) << endl;
411  }
412  // outf->precision(3);
413  // outf->setf(ios::left, ios::adjustfield);
414  outf->width(8);
415  *outf << ptr->name();
416  outf->setf(ios::scientific, ios::floatfield);
417  outf->width(8);
418  *outf << (ptr->dur() * 1000.0) << "\t (99,80)" << endl;
419 
420  if (outf != &cout)
421  delete outf;
422 
423  return write_ok;
424 }
425 
426 EST_write_status save_label_names(const EST_String &filename,
427  const EST_Relation &a,
428  const EST_String &features)
429 {
430  EST_Stream_Item *ptr;
431 
432  ostream *outf;
433  if (filename == "-")
434  outf = &cout;
435  else
436  outf = new ofstream(filename);
437 
438  if (!(*outf))
439  {
440  cerr << "save_label_name: can't open label output file \""
441  << filename << "\"" << endl;
442  return misc_write_error;
443  }
444 
445  for (ptr = a.head(); inext(ptr) != 0; ptr = inext(ptr))
446  {
447  *outf << ptr->name();
448  if ((features != "") && (features != "OneLine"))
449  *outf << endl;
450  else
451  *outf << " ";
452  }
453 
454  *outf << ptr->name() << endl;
455 
456  if (outf != &cout)
457  delete outf;
458  return write_ok;
459 }
460 #endif
461 
462 EST_write_status save_RelationList(const EST_String &filename,
463  const EST_RelationList &plist,
464  int time, int path)
465 {
466  EST_Litem *p;
467  EST_Item *ptr;
468  EST_String outname;
469  float start,end;
470 
471  ostream *outf;
472  if (filename == "-")
473  outf = &cout;
474  else
475  outf = new ofstream(filename);
476 
477  if (!(*outf))
478  {
479  cerr << "save_StreamList: can't open MLF output file \""
480  << filename << "\"\n";
481  return write_fail;
482  }
483 
484  *outf << "#!MLF!#\n"; // MLF header/identifier
485  outf->precision(6);
486 
487  start = end = 0;
488  for (p = plist.head(); p != 0; p = p->next())
489  {
490  outname = path ? plist(p).name() : basename(plist(p).name());
491  *outf << "\"*/" << outname<<"\"\n";
492  for (ptr = plist(p).head(); ptr != 0; ptr = inext(ptr))
493  {
494  if (time)
495  {
496  outf->width(15);
497  cout.setf(ios::left,ios::adjustfield);
498  *outf << (int)(start * HTK_UNITS_PER_SECOND);
499  outf->width(15);
500  end = ptr->F("end",0.0);
501  *outf << (int)(end * HTK_UNITS_PER_SECOND) << " ";
502  start = end;
503  }
504  *outf << ptr->S("name","0") << endl;
505  }
506  *outf << ".\n";
507  }
508 
509  if (outf != &cout)
510  delete outf;
511  return write_ok;
512 }
513 
514 EST_write_status save_WordList(const EST_String &filename,
515  const EST_RelationList &plist,
516  int style)
517 {
518  EST_Litem *p;
519  EST_Item *ptr;
520 
521  ostream *outf;
522  if (filename == "-")
523  outf = &cout;
524  else
525  outf = new ofstream(filename);
526 
527  if (!(*outf))
528  {
529  cerr << "save:WordList: can't open WordList output file \""
530  << filename << "\"\n";
531  return write_fail;
532  }
533 
534  for (p = plist.head(); p != 0; p = p->next())
535  {
536  for (ptr = plist(p).head(); inext(ptr) != 0; ptr = inext(ptr))
537  {
538  *outf << ptr->name();
539  if (style == 0)
540  *outf << endl;
541  else
542  *outf << " ";
543  }
544  if (ptr != 0)
545  *outf << ptr->name() << endl;
546  }
547 
548  if (outf != &cout)
549  delete outf;
550  return write_ok;
551 }
552 
553 EST_write_status save_ind_RelationList(const EST_String &filename,
554  const EST_RelationList &plist,
555  const EST_String &features,
556  int path)
557 {
558  EST_Litem *p;
559  EST_String outname;
560  (void) filename;
561  (void) features;
562 
563  for (p = plist.head(); p != 0; p = p->next())
564  {
565  outname = path ? plist(p).name() : basename(plist(p).name());
566  if (plist(p).save(outname,false) != write_ok)
567  return misc_write_error;
568  }
569 
570  return write_ok;
571 }
572 
573 EST_read_status load_RelationList(const EST_String &filename,
574  EST_RelationList &plist)
575 {
576  EST_TokenStream ts;
577  EST_String fns, name;
578 
579  if (((filename == "-") ? ts.open(cin) : ts.open(filename)) != 0)
580  {
581  cerr << "Can't open label input file " << filename << endl;
582  return misc_read_error;
583  }
584  // set up the character constant values for this stream
585  ts.set_SingleCharSymbols(";");
586 
587  // Skip over header
588  if (ts.get().string() != "#!MLF!#")
589  {
590  cerr << "Not MLF file\n";
591  return wrong_format;
592  }
593 
594  while(!ts.eof())
595  {
596  // put filename in as stream name. The filename is usually surrounded
597  // by quotes, so remove these.
598  fns = ts.get().string();
599  strip_quotes(fns);
600  EST_Relation s(fns);
601  s.f.set("name", fns); // simonk
602  plist.append(s);
603 
604  if (read_label_portion(ts, plist.last(), 10000000) == misc_read_error)
605  {
606  cerr << "error: in reading MLF file\n";
607  cerr << "section for file " << fns <<
608  " at line " << ts.linenum() << " is badly formatted\n";
609 
610  return misc_read_error;
611  }
612  }
613 
614  return format_ok;
615 }
616 
617 static void pad_ends(EST_Relation &s, float length)
618 {
619  // add evenly spaced dummy end values to Relation
620  EST_Item *p;
621  int i;
622 
623  for (i = 0, p = s.head(); p; p = inext(p), ++i)
624  p->set("end",(length * float(i)/float(s.length())));
625 }
626 
627 EST_read_status read_RelationList(EST_RelationList &plist,
628  EST_StrList &files, EST_Option &al)
629 {
630  EST_Litem *p, *plp;
631 
632  if (al.val("-itype", 0) == "mlf")
633  {
634  if (load_RelationList(files.first(), plist) != format_ok)
635  exit (-1);
636  }
637  else
638  for (p = files.head(); p; p = p->next())
639  {
640  EST_Relation s(files(p));
641  plist.append(s);
642  plp = plist.tail();
643  if (al.present("-itype"))
644  {
645  if (plist(plp).load(files(p), al.val("-itype")) != format_ok)
646  exit (-1);
647  }
648  else if (plist(plp).load(files(p)) != format_ok)
649  exit (-1);
650  if ((al.val("-itype", 0) == "words") && (al.present("-length")))
651  pad_ends(s, al.fval("-length"));
652 
653  }
654 
655  return format_ok;
656 }
EST_Features::set
void set(const EST_String &name, int ival)
Definition: EST_Features.h:185
EST_Option
Definition: EST_Option.h:50
EST_Token::col
int col(void) const
Line position in original \Ref{EST_TokenStream}.
Definition: EST_Token.h:184
EST_TokenStream::eof
int eof()
end of file
Definition: EST_Token.h:356
EST_Features
Definition: EST_Features.h:62
EST_Item::S
const EST_String S(const EST_String &name) const
Definition: EST_Item.h:143
EST_Relation::f
EST_Features f
Definition: EST_Relation.h:103
EST_TList
Definition: EST_TList.h:59
EST_TList::first
const T & first() const
return const reference to first item in list
Definition: EST_TList.h:146
EST_TokenStream
Definition: EST_Token.h:235
EST_Relation::length
int length() const
Definition: EST_Relation.cc:137
EST_TList::append
void append(const T &item)
add item onto end of list
Definition: EST_TList.h:191
EST_TList::last
const T & last() const
return const reference to last item in list
Definition: EST_TList.h:149
EST_TokenStream::set_quotes
void set_quotes(char q, char e)
set characters to be used as quotes and escape, and set quote mode
Definition: EST_Token.h:347
EST_TKVL::present
const int present(const K &rkey) const
Returns true if key is present.
Definition: EST_TKVL.cc:222
EST_TokenStream::linenum
int linenum(void) const
returns line number of \Ref{EST_TokenStream}
Definition: EST_Token.h:354
EST_Option::fval
float fval(const EST_String &rkey, int m=1) const
Definition: EST_Option.cc:98
EST_Item::F
const float F(const EST_String &name) const
Definition: EST_Item.h:134
EST_Features::length
int length() const
Definition: EST_Features.h:250
EST_Features::remove
void remove(const EST_String &name)
Definition: EST_Features.h:246
EST_Token
Definition: EST_Token.h:73
EST_TokenStream::get_upto_eoln
EST_Token get_upto_eoln(void)
get up to {\tt s} in end of line as a single token.
Definition: EST_Token.cc:516
EST_TokenStream::set_SingleCharSymbols
void set_SingleCharSymbols(const EST_String &sc)
set which characters are to be treated as single character symbols
Definition: EST_Token.h:338
EST_Relation::clear
void clear()
Definition: EST_Relation.cc:153
EST_Features::present
int present(const EST_String &name) const
Definition: EST_Features.cc:147
EST_Regex
Definition: EST_Regex.h:55
EST_TokenStream::eoln
int eoln()
end of line
Definition: EST_Token.cc:818
EST_TokenStream::open
int open(const EST_String &filename)
open a \Ref{EST_TokenStream} for a file.
Definition: EST_Token.cc:200
EST_UItem
Definition: EST_UList.h:51
EST_Relation
Definition: EST_Relation.h:67
EST_TStructIterator
Definition: EST_TIterator.h:74
EST_TokenStream::get
EST_TokenStream & get(EST_Token &t)
get next token in stream
Definition: EST_Token.cc:486
EST_String
Definition: EST_String.h:70
EST_Features::save
EST_write_status save(ostream &outf) const
save features in already opened ostream
Definition: EST_features_io.cc:111
EST_Features::load
EST_read_status load(EST_TokenStream &ts)
load features from already opened EST_TokenStream
Definition: EST_features_io.cc:248
EST_Item::set
void set(const EST_String &name, int ival)
Definition: EST_Item.h:179
EST_Relation::head
EST_Item * head() const
Definition: EST_Relation.h:125
EST_TokenStream::filename
const EST_String filename() const
The originating filename (if there is one)
Definition: EST_Token.h:372
EST_String::length
int length(void) const
Length of string ({not} length of underlying chunk)
Definition: EST_String.h:241
EST_TokenStream::peek
EST_Token & peek(void)
peek at next token
Definition: EST_Token.cc:830
EST_TKVL::val
const V & val(const K &rkey, bool m=0) const
return value according to key (const)
Definition: EST_TKVL.cc:145
EST_Item
Definition: EST_Item.h:82