diff -Nru festival-2.4~release/config/project.mak festival-2.5.0/config/project.mak --- festival-2.4~release/config/project.mak 2014-12-20 15:31:03.000000000 +0000 +++ festival-2.5.0/config/project.mak 2017-12-25 15:05:54.000000000 +0000 @@ -40,8 +40,8 @@ PROJECT_NAME = Festival Speech Synthesis System PROJECT_PREFIX = FESTIVAL -PROJECT_VERSION = 2.4 -PROJECT_DATE = December 2014 +PROJECT_VERSION = 2.5.0 +PROJECT_DATE = December 2017 PROJECT_STATE = release # config files of projects we depend on diff -Nru festival-2.4~release/COPYING festival-2.5.0/COPYING --- festival-2.4~release/COPYING 2014-11-16 17:11:09.000000000 +0000 +++ festival-2.5.0/COPYING 2017-09-04 15:54:08.000000000 +0000 @@ -4,7 +4,7 @@ The Festival Speech Synthesis System Centre for Speech Technology Research University of Edinburgh, UK - Copyright (c) 1996-2014 + Copyright (c) 1996-2017 All Rights Reserved. Permission is hereby granted, free of charge, to use and distribute diff -Nru festival-2.4~release/debian/changelog festival-2.5.0/debian/changelog --- festival-2.4~release/debian/changelog 2017-11-05 18:38:08.000000000 +0000 +++ festival-2.5.0/debian/changelog 2018-01-18 21:02:29.000000000 +0000 @@ -1,3 +1,12 @@ +festival (1:2.5.0-1) unstable; urgency=medium + + * New upstream release + * Bump Build-Depends on libestools-dev to prevent FTBFS + * Enable hardening + * Multiple minor improvements to the packaging + + -- Paul Gevers Thu, 18 Jan 2018 22:02:29 +0100 + festival (1:2.4~release-4) unstable; urgency=medium [ Samuel Thibault ] diff -Nru festival-2.4~release/debian/clean festival-2.5.0/debian/clean --- festival-2.4~release/debian/clean 2016-09-05 06:34:28.000000000 +0000 +++ festival-2.5.0/debian/clean 2018-01-18 21:02:29.000000000 +0000 @@ -1 +1,29 @@ +bin/festival* confdefs.h +config.cache +config.guess +config.log +config.status +config.sub +config/modincludes.inc +config/system.mak +doc/*.1 +doc/festfeat.texi +doc/festfunc.texi +doc/festival.aux +doc/festival.cp +doc/festival.cps +doc/festival.dvi +doc/festival.fn +doc/festival.ky +doc/festival.log +doc/festival.pg +doc/festival.ps +doc/festival.toc +doc/festival.tp +doc/festival.vr +doc/festvars.texi +doc/html/ +doc/info/ +src/modules/hts21_engine/*.o +src/modules/hts21_engine/.buildlib_Festival diff -Nru festival-2.4~release/debian/compat festival-2.5.0/debian/compat --- festival-2.4~release/debian/compat 2016-09-05 06:34:28.000000000 +0000 +++ festival-2.5.0/debian/compat 2018-01-18 21:02:29.000000000 +0000 @@ -1 +1 @@ -9 +11 diff -Nru festival-2.4~release/debian/control festival-2.5.0/debian/control --- festival-2.4~release/debian/control 2017-11-05 17:11:48.000000000 +0000 +++ festival-2.5.0/debian/control 2018-01-18 21:02:29.000000000 +0000 @@ -6,10 +6,10 @@ Jean-Philippe MENGUAL , Kumar Appaiah , Jaldhar H. Vyas , + Paul Gevers , Samuel Thibault -Build-Depends: debhelper (>= 9.20150628), - autotools-dev, - libestools-dev (>= 1:2.4~release-3~), +Build-Depends: debhelper (>= 11~), + libestools-dev (>= 1:2.5~), libncurses5-dev, texinfo, texlive-latex-base, @@ -19,6 +19,7 @@ Homepage: http://www.cstr.ed.ac.uk/projects/festival/ Vcs-Git: https://anonscm.debian.org/git/tts/festival.git Vcs-Browser: https://anonscm.debian.org/git/tts/festival.git +Rules-Requires-Root: no Package: festival Architecture: any @@ -27,15 +28,6 @@ alsa-utils [linux-any], lsb-base (>= 3.0-10) Recommends: festvox-kallpc16k | festival-voice -Breaks: festvox-rablpc8k (<< 1.4.0-2), - festvox-rablpc16k (<< 1.4.0-2), - festvox-kdlpc16k (<< 1.4.0-4), - festvox-kdlpc8k (<< 1.4.0-5), - festvox-don (<< 1.4.0-3), - festvox-ellpc11k (<< 1.4.0-1), - festlex-cmu (<< 1.4.0-3), - festlex-oald (<< 1.4.0-2), - festlex-poslex (<< 1.4.0-3) Suggests: pidgin-festival, festival-freebsoft-utils Description: General multi-lingual speech synthesis system Festival offers a full text to speech system with various APIs, as well an diff -Nru festival-2.4~release/debian/copyright festival-2.5.0/debian/copyright --- festival-2.4~release/debian/copyright 2015-05-03 17:33:42.000000000 +0000 +++ festival-2.5.0/debian/copyright 2018-01-18 21:02:29.000000000 +0000 @@ -8,7 +8,7 @@ See ACKNOWLEDGMENTS for more details -Copyright (C) 1996-2006 Centre for Speech Technology Research, +Copyright (C) 1996-2017 Centre for Speech Technology Research, University of Edinburgh, UK License: diff -Nru festival-2.4~release/debian/festival.docs festival-2.5.0/debian/festival.docs --- festival-2.4~release/debian/festival.docs 2016-09-05 06:34:28.000000000 +0000 +++ festival-2.5.0/debian/festival.docs 2018-01-18 21:02:29.000000000 +0000 @@ -1,2 +1,2 @@ -README +README.md ACKNOWLEDGMENTS diff -Nru festival-2.4~release/debian/patches/17-performance-hts.diff festival-2.5.0/debian/patches/17-performance-hts.diff --- festival-2.4~release/debian/patches/17-performance-hts.diff 2016-09-05 06:34:28.000000000 +0000 +++ festival-2.5.0/debian/patches/17-performance-hts.diff 2018-01-18 21:02:29.000000000 +0000 @@ -39,7 +39,7 @@ FILE *fp = fopen(name, opt); if (fp == NULL) { -@@ -65,6 +71,50 @@ +@@ -65,6 +71,50 @@ static FILE *Getfp(const char *name, con return (fp); } @@ -90,7 +90,7 @@ /* HTS_Synthesize_Utt: generate speech from utt by using hts_engine API */ static LISP HTS_Synthesize_Utt(LISP utt) { EST_Utterance *u = get_c_utt(utt); -@@ -72,17 +122,23 @@ +@@ -72,17 +122,23 @@ static LISP HTS_Synthesize_Utt(LISP utt) LISP hts_engine_params = NIL; LISP hts_output_params = NIL; @@ -117,7 +117,7 @@ /* get params */ hts_engine_params = siod_get_lval("hts_engine_params", -@@ -92,54 +148,80 @@ +@@ -92,54 +148,80 @@ static LISP HTS_Synthesize_Utt(LISP utt) "festopt_hts_engine: no output parameters set for module"); /* get model file names */ @@ -223,7 +223,7 @@ /* close output file pointers */ if (rawfp != NULL) -@@ -147,36 +229,40 @@ +@@ -147,36 +229,40 @@ static LISP HTS_Synthesize_Utt(LISP utt) if (durfp != NULL) fclose(durfp); @@ -263,7 +263,7 @@ + r->load("tmp.lab", ts_label, "htk"); for (o = r->first(), s = u->relation("Segment")->first(); - (o != NULL) && (s != NULL); o = o->next(), s = s->next()) + (o != NULL) && (s != NULL); o = inext(o), s = inext(s)) if (o->S("name").before("+").after("-").matches(s->S("name"))) s->set("end", o->F("end")); else @@ -278,16 +278,7 @@ return utt; } -@@ -188,7 +274,7 @@ - void HTS_get_copyright(char *str) { - int i, nCopyright = HTS_NCOPYRIGHT; - char url[] = HTS_URL, version[] = HTS_VERSION; -- char *copyright[] = { HTS_COPYRIGHT }; -+ const char *copyright[] = { HTS_COPYRIGHT }; - - sprintf(str, - "\nThe HMM-Based Speech Synthesis Engine \"hts_engine API\"\n"); -@@ -211,10 +297,10 @@ +@@ -211,10 +297,10 @@ void HTS_get_copyright(char *str) { void festival_hts_engine_init(void) { char buf[4096]; @@ -1097,7 +1088,7 @@ { int i; -@@ -117,7 +117,7 @@ +@@ -117,7 +117,7 @@ static void HTS_Label_load(HTS_Label * l /* parse label file */ while (HTS_get_token_from_fp(fp, buff)) { @@ -1106,7 +1097,7 @@ break; label->size++; -@@ -169,7 +169,7 @@ +@@ -169,7 +169,7 @@ void HTS_Label_load_from_strings(HTS_Lab } /* copy label */ for (i = 0; i < num_lines; i++) { diff -Nru festival-2.4~release/debian/rules festival-2.5.0/debian/rules --- festival-2.4~release/debian/rules 2016-09-05 06:34:28.000000000 +0000 +++ festival-2.5.0/debian/rules 2018-01-18 21:02:29.000000000 +0000 @@ -1,7 +1,6 @@ #!/usr/bin/make -f -FT := $(shell pwd)/debian/festival -FDT := $(shell pwd)/debian/festival-dev +export DEB_BUILD_MAINT_OPTIONS = hardening=+all override_dh_auto_configure: #Avoid conflicting with upstreams build system @@ -10,12 +9,6 @@ #Upstream states test is only for their local development not a functional test override_dh_auto_build: -ifneq "$(wildcard /usr/share/misc/config.sub)" "" - cp -f /usr/share/misc/config.sub config.sub -endif -ifneq "$(wildcard /usr/share/misc/config.guess)" "" - cp -f /usr/share/misc/config.guess config.guess -endif $(MAKE) cd doc && $(MAKE) festival.info festival.html festival.ps @@ -28,18 +21,6 @@ find . -name make.include -print0 | xargs -0r $(RM) find bin/ \( -name SCCS -o -name RCS -o -name CVS \) -prune \ -o -type f ! -name Makefile -print0 | xargs -0r $(RM) - rm -rf doc/*.1 doc/festfeat.texi doc/festvars.texi doc/festfunc.texi \ - doc/info src/modules/hts21_engine/*.o \ - src/modules/hts21_engine/.buildlib_Festival - rm -f config/modincludes.inc config/system.mak - rm -f bin/festival* - rm -f config.sub config.guess config.log config.status config.cache - -rmdir src/modules/hts21_engine - rm -f doc/festival.ps doc/festival.dvi doc/festival.cps - rm -rf doc/html - rm -rf doc/festival.aux doc/festival.cp doc/festival.fn \ - doc/festival.ky doc/festival.log doc/festival.pg \ - doc/festival.toc doc/festival.tp doc/festival.vr dh_clean override_dh_installinit: diff -Nru festival-2.4~release/debian/watch festival-2.5.0/debian/watch --- festival-2.4~release/debian/watch 2016-09-05 06:34:28.000000000 +0000 +++ festival-2.5.0/debian/watch 2018-01-18 21:02:29.000000000 +0000 @@ -1,3 +1,3 @@ version=3 -opts="uversionmangle=s/-beta/~beta/;s/-release/~release/" \ +opts="uversionmangle=s/-beta/~beta/;s/-release//" \ http://www.festvox.org/packed/festival/([\d.]*)/festival-(.*)\.(?:zip|tgz|tbz2|txz|tar\.(?:gz|bz2|xz)) diff -Nru festival-2.4~release/INSTALL festival-2.5.0/INSTALL --- festival-2.4~release/INSTALL 2014-12-11 15:24:37.000000000 +0000 +++ festival-2.5.0/INSTALL 2017-12-25 15:05:20.000000000 +0000 @@ -17,10 +17,10 @@ In order to compile Festival you first need the following source packages -`festival-2.4-release.tar.gz' +`festival-2.5-release.tar.gz' Festival Speech Synthesis System source -`speech_tools-2.4-release.tar.gz' +`speech_tools-2.5-release.tar.gz' The Edinburgh Speech Tools Library `festlex_NAME.tar.gz' @@ -142,7 +142,7 @@ particular system. In most cases you need only type gmake and the system will configure itself and compile, (note you need to -have compiled the Edinburgh Speech Tools `speech_tools-1.2.4' first. +have compiled the Edinburgh Speech Tools `speech_tools-1.2.5' first. In some case hand configure is require. All of the configuration choice are held in the file `config/config' @@ -179,6 +179,10 @@ festlex_POSLEX.tar.gz festvox_kallpc16k.tar.gz +On install the base voice (onely one really old one and only US English) + + make default_voices + Note that the single most common reason for problems in compilation and linking found amongst the beta testers was a bad installation of GNU C++. If you get many strange errors in G++ library header files or link diff -Nru festival-2.4~release/lib/lts_build.scm festival-2.5.0/lib/lts_build.scm --- festival-2.4~release/lib/lts_build.scm 2014-03-11 18:48:09.000000000 +0000 +++ festival-2.5.0/lib/lts_build.scm 2017-09-04 15:54:08.000000000 +0000 @@ -71,28 +71,39 @@ (pp (intern (string-append phone "-" nphone)))) (assoc_string pp (cdr ll)))) -(define (find-aligns phones letters) +(define (find-aligns phones letters ecount) "(find-aligns phones letters) Find all feasible alignments." - (let ((r nil)) + (let ((r nil) (lp nil) (ll nil)) (cond ((and (null (cdr phones)) (null (cdr letters)) (equal? (car phones) (car letters)) (equal? '# (car phones))) (list (list (cons '# '#)))) ;; valid end match + ;; Give up with alignment if its out of sync too much + ((and (> (sqrt (* ecount ecount)) 3)) + r) + ((and nil (> 4 (set! lp (length phones))) + (> (sqrt (* (- lp (set! ll (length letters))) + (- lp ll))) + 2)) +; (format t "out of sync %d %l %l\n" +; (- (length phones) (length letters)) +; phones letters) + r) (t (if (valid-pair '_epsilon_ (car letters)) (set! r (mapcar (lambda (p) (cons (cons '_epsilon_ (car letters)) p)) - (find-aligns phones (cdr letters))))) + (find-aligns phones (cdr letters) (+ 1 ecount))))) (if (valid-pair (car phones) (car letters)) (set! r (append r (mapcar (lambda (p) (cons (cons (car phones) (car letters)) p)) - (find-aligns (cdr phones) (cdr letters)))))) + (find-aligns (cdr phones) (cdr letters) ecount))))) ;; Hmm, change this to always check doubles (if (valid-pair-e (car phones) (car (cdr phones)) (car letters)) (set! r @@ -104,11 +115,11 @@ (car (cdr phones)))) (car letters)) p)) (find-aligns (cdr (cdr phones)) - (cdr letters)))))) + (cdr letters) (+ ecount 1)))))) r)))) (define (findallaligns phones letters) - (let ((a (find-aligns phones letters))) + (let ((a (find-aligns phones letters 0))) (if (null a) (begin (set! failedaligns (+ 1 failedaligns)) @@ -621,7 +632,8 @@ ((string-equal ltype "utf8") (format ofd "( %l %l (" - (utf8explode (car entry)) + ;; Note sure downcase is generic enough for utf8 + (mapcar downcase (utf8explode (car entry))) (cadr entry))) ((string-equal ltype "asis") (format ofd @@ -629,7 +641,7 @@ (car entry) (cadr entry))) (t - (format ofd + (format ofd "( \"%s\" %l (" (downcase (car entry)) (cadr entry)))) diff -Nru festival-2.4~release/lib/multisyn/multisyn.scm festival-2.5.0/lib/multisyn/multisyn.scm --- festival-2.4~release/lib/multisyn/multisyn.scm 2013-07-03 13:43:35.000000000 +0000 +++ festival-2.5.0/lib/multisyn/multisyn.scm 2017-09-04 15:54:08.000000000 +0000 @@ -40,6 +40,8 @@ (require 'multisyn_pauses) (require 'target_cost) + + ;; use a global parameter to specify which UnitSelection voice ;; to use to synthesise a given utterance for now, because the ;; standard Festival synthesis mainline doesn't accept a voice @@ -47,6 +49,7 @@ (defvar currentMultiSynVoice nil) (defvar relp t) (defvar flattenVoice nil) +(defvar MultiSyn_module_hooks nil) ; extract utt list from a .data file (define (load_utt_list filename) @@ -65,7 +68,6 @@ (defSynthType MultiSyn ;(print "Multisyn unit selection synthesis") - (defvar MultiSyn_module_hooks nil) (Param.def "unisyn.window_name" "hanning") (Param.def "unisyn.window_factor" 1.0) ;; Unisyn requires these to be set. @@ -108,6 +110,9 @@ ) + + + ; target cost scheme code (define (targetcost it1 it2) (Default_Target_Cost it1 it2)) @@ -177,8 +182,6 @@ voice)) - - (define (define_current_voice_reset) "(define_current_voice_reset) Re-define (current_voice_reset) correctly." @@ -192,4 +195,6 @@ (Param.set 'unisyn.window_symmetric 1)) + (provide 'multisyn) + diff -Nru festival-2.4~release/lib/pauses.scm festival-2.5.0/lib/pauses.scm --- festival-2.4~release/lib/pauses.scm 2010-02-04 03:24:33.000000000 +0000 +++ festival-2.5.0/lib/pauses.scm 2017-09-04 15:54:08.000000000 +0000 @@ -41,7 +41,8 @@ (rval rval) ;; new style (t (Classic_Pauses utt)))) - (Pause_optional_deleting_B_X utt)) + (Pause_optional_deleting_B_X utt) + utt) (define (Classic_Pauses utt) "(Pauses UTT) diff -Nru festival-2.4~release/lib/sable-mode.scm festival-2.5.0/lib/sable-mode.scm --- festival-2.4~release/lib/sable-mode.scm 2014-12-02 21:17:06.000000000 +0000 +++ festival-2.5.0/lib/sable-mode.scm 2017-09-04 15:54:08.000000000 +0000 @@ -300,6 +300,16 @@ (")EMPH" (ATTLIST UTT) (set! xxml_word_features (sable_pop_word_features)) UTT) + ("(BC" (ATTLIST UTT) + (xxml_synth UTT) + (format t "pre BC\n") + (set! bc_feats ATTLIST) + nil) + (")BC" (ATTLIST UTT) + (xxml_synth UTT) + (format t "post BC\n") + (set! bc_feats nil) + nil) ("(PITCH" (ATTLIST UTT) ;; Status: probably complete ;; At present festival requires an utterance break here diff -Nru festival-2.4~release/lib/Sable.v0_2.dtd festival-2.5.0/lib/Sable.v0_2.dtd --- festival-2.4~release/lib/Sable.v0_2.dtd 2001-04-04 11:55:20.000000000 +0000 +++ festival-2.5.0/lib/Sable.v0_2.dtd 2017-09-04 15:54:08.000000000 +0000 @@ -58,6 +58,7 @@ SAYAS | LANGUAGE | SPEAKER | + BC | DIV"> @@ -90,6 +91,18 @@ + + diff -Nru festival-2.4~release/lib/synthesis.scm festival-2.5.0/lib/synthesis.scm --- festival-2.4~release/lib/synthesis.scm 2001-04-04 13:12:35.000000000 +0000 +++ festival-2.5.0/lib/synthesis.scm 2017-09-04 15:54:08.000000000 +0000 @@ -273,6 +273,7 @@ ;; All the rest (us_generate_wave utt (Parameter.get 'us_sigpr) 'analysis_period))) + utt ) (defSynthType None @@ -365,19 +366,30 @@ and then those demanded by the voice. After modules have been applied synth_hooks are applied to allow extra manipulation. [see Utterance types]" + (let ((uttr)) + (set! uttr (apply_hooks before_synth_hooks utt)) - (apply_hooks before_synth_hooks utt) - - (let ((type (utt.type utt))) - (let ((definition (assoc type UttTypes))) - (if (null? definition) - (error "Unknown utterance type" type) - (let ((body (eval (cons 'lambda - (cons '(utt) (cdr definition)))))) - (body utt))))) - - (apply_hooks after_synth_hooks utt) - utt) + (let ((type (utt.type uttr))) + (let ((definition (assoc type UttTypes))) + (if (null? definition) + (error "Unknown utterance type" type) + (let ((body (eval (cons 'lambda + (list '(utt) + (utttype_recursify + (reverse (cdr definition)))))))) + (set! uttr (body uttr))))) + + (apply_hooks after_synth_hooks uttr)))) + +(define (utttype_recursify definition) + "(utttype_recursify definition) +Change the linear list of module names into a recursive list so you can +truly modify the utterance within the synthesis process." + (cond + ((null definition) nil) + ((null (cdr definition)) (car definition)) + (t + (list (caar definition) (utttype_recursify (cdr definition)))))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; diff -Nru festival-2.4~release/lib/tilt.scm festival-2.5.0/lib/tilt.scm --- festival-2.4~release/lib/tilt.scm 2003-04-20 17:13:41.000000000 +0000 +++ festival-2.5.0/lib/tilt.scm 2017-09-04 15:54:08.000000000 +0000 @@ -340,7 +340,7 @@ "Syllable.lisp_get_rhyme_length Length from start of the vowel to end of syllable.") (define (get_rhyme_length syl) - (- (item.feat syl 'end) + (- (item.feat syl 'R:SylStructure.daughtern.end) (item.feat syl 'vowel_start syl))) (def_feature_docstring 'SylStructure.lisp_get_onset_length diff -Nru festival-2.4~release/Makefile festival-2.5.0/Makefile --- festival-2.4~release/Makefile 2003-01-25 00:26:47.000000000 +0000 +++ festival-2.5.0/Makefile 2017-12-25 15:01:31.000000000 +0000 @@ -2,7 +2,7 @@ ## ## ## Centre for Speech Technology Research ## ## University of Edinburgh, UK ## -## Copyright (c) 1996-2002 ## +## Copyright (c) 1996-2017 ## ## All Rights Reserved. ## ## ## ## Permission is hereby granted, free of charge, to use and distribute ## @@ -34,7 +34,7 @@ ## The Festival Speech Synthesis System ## ## ## ## Authors: Alan W Black, Paul Taylor, Richard Caley and others ## -## Date: January 2003 ## +## Date: December 2017 ## ## ## ########################################################################### TOP=. @@ -43,7 +43,7 @@ ALL_DIRS=config $(BUILD_DIRS) testsuite CONFIG=configure configure.in config.sub config.guess \ missing install-sh mkinstalldirs -FILES = Makefile README ACKNOWLEDGMENTS NEWS COPYING INSTALL $(CONFIG) +FILES = Makefile README.md ACKNOWLEDGMENTS NEWS COPYING INSTALL $(CONFIG) VERSION=$(PROJECT_VERSION) LOCAL_CLEAN= Templates.DB @@ -58,6 +58,9 @@ include $(TOP)/config/common_make_rules +default_voices: + ./src/scripts/default_voices.sh + backup: time-stamp @ $(RM) -f $(TOP)/FileList @ $(MAKE) file-list diff -Nru festival-2.4~release/README festival-2.5.0/README --- festival-2.4~release/README 2014-11-16 17:10:58.000000000 +0000 +++ festival-2.5.0/README 1970-01-01 00:00:00.000000000 +0000 @@ -1,57 +0,0 @@ - - The Festival Speech Synthesis System - version 2.4 December 2014 - -This directory contains the Festival Speech Synthesis System, -developed at CSTR, University of Edinburgh. The project was originally -started by Alan W Black and Paul Taylor but many others have been -involved (see ACKNOWLEDGEMENTS file for full list). - -Festival offers a general framework for building speech synthesis -systems as well as including examples of various modules. As a whole -it offers full text to speech through a number APIs: from shell level, -though a Scheme command interpreter, as a C++ library, and an Emacs -interface. Festival is multi-lingual (currently English (US and UK) -and Spanish are distributed but a host of other voices have been -developed by others) though English is the most advanced. - -The system is written in C++ and uses the Edinburgh Speech Tools -for low level architecture and has a Scheme (SIOD) based command -interpreter for control. Documentation is given in the FSF texinfo -format which can generate, a printed manual, info files and HTML. - -COPYING - -Festival is free. Earlier versions were restricted to non-commercial -use but we have now relaxed those conditions. The licence is an X11 -style licence thus it can be incorporated in commercial products -and free source products without restriction. See COPYING for the -actual details. - -INSTALL - -Festival should run on any standard Unix platform. It has already run -on Solaris, SunOS, Linux and FreeBSD. It requires a C++ compiler (GCC -2.7.2, 2.8.1, 2.95.[123], 3.2.3 3.3.2 RedHat "gcc-2.96", gcc 3.3, gcc -4.4.x and gcc-4.5.x are our standard compilers) to install. A port to -Windows XP/NT/95/98 and 2000 using either Cygnus GNUWIN32, this is -still new but many people are successfully using it. - -A detailed description of installation and requirements for the whole -system is given in the file INSTALL read that for details. - -NEWS - -Keep abreast of Festival News by regularly checking the Festival homepage - http://www.cstr.ed.ac.uk/projects/festival/ -or the US site - http://festvox.org/festival/ - -New in Festival 2.2 - updates to hts (hts_engine 1.07) and clustergen - -New in Festival 2.1 - Support for various new GCC compilers - Improved support for hts, clustergen, clunits and multisyn voices - lots of wee bugs fixed - diff -Nru festival-2.4~release/README.md festival-2.5.0/README.md --- festival-2.4~release/README.md 1970-01-01 00:00:00.000000000 +0000 +++ festival-2.5.0/README.md 2017-12-25 15:02:43.000000000 +0000 @@ -0,0 +1,65 @@ + + The Festival Speech Synthesis System + version 2.5 December 2017 + +https://github.com/festvox/festival/ + +This directory contains the Festival Speech Synthesis System, +developed at CSTR, University of Edinburgh. The project was originally +started by Alan W Black and Paul Taylor but many others have been +involved (see ACKNOWLEDGEMENTS file for full list). + +Festival offers a general framework for building speech synthesis +systems as well as including examples of various modules. As a whole +it offers full text to speech through a number APIs: from shell level, +though a Scheme command interpreter, as a C++ library, and an Emacs +interface. Festival is multi-lingual (currently English (US and UK) +and Spanish are distributed but a host of other voices have been +developed by others) though English is the most advanced. + +The system is written in C++ and uses the Edinburgh Speech Tools +for low level architecture and has a Scheme (SIOD) based command +interpreter for control. Documentation is given in the FSF texinfo +format which can generate, a printed manual, info files and HTML. + +COPYING + +Festival is free. Earlier versions were restricted to non-commercial +use but we have now relaxed those conditions. The licence is an X11 +style licence thus it can be incorporated in commercial products +and free source products without restriction. See COPYING for the +actual details. + +INSTALL + +Festival should run on any standard Unix platform. It has already run +on Solaris, SunOS, Linux and FreeBSD. It requires a C++ compiler (GCC +2.7.2, 2.8.1, 2.95.[123], 3.2.3 3.3.2 RedHat "gcc-2.96", gcc 3.3, gcc +4.4.x and gcc-4.5.x, gcc-6.2.0 are our standard compilers) to +install. A port to Windows XP/NT/95/98 and 2000 using either Cygnus +GNUWIN32, this is still new but many people are successfully using it, +it works fine with Windows 10 bash + +A detailed description of installation and requirements for the whole +system is given in the file INSTALL read that for details. + +NEWS + +Keep abreast of Festival News by regularly checking the Festival homepage + http://www.cstr.ed.ac.uk/projects/festival/ +or the US site + http://festvox.org/festival/ +otr on github + https://github.com/festvox/festival/ + +New in Festival 2.5 + Support for gcc6 (which is a somewhat different dialect of C++) + +New in Festival 2.2 + updates to hts (hts_engine 1.07) and clustergen + +New in Festival 2.1 + Support for various new GCC compilers + Improved support for hts, clustergen, clunits and multisyn voices + lots of wee bugs fixed + diff -Nru festival-2.4~release/src/arch/festival/features.cc festival-2.5.0/src/arch/festival/features.cc --- festival-2.4~release/src/arch/festival/features.cc 2012-11-16 17:37:04.000000000 +0000 +++ festival-2.5.0/src/arch/festival/features.cc 2017-09-04 15:54:08.000000000 +0000 @@ -231,17 +231,17 @@ const EST_String &Sname = ts.get().string(); const char *name = Sname; if (streq(name,"n")) - s=s->next(); + s=inext(s); else if (streq(name,"p")) - s=s->prev(); + s=iprev(s); else if (streq(name,"nn")) - s=s->next()->next(); + s=inext(inext(s)); else if (streq(name,"pp")) - s=s->prev()->prev(); + s=iprev(iprev(s)); else if (streq(name,"up")) // up down should really be private - s=s->up(); + s=iup(s); else if (streq(name,"down")) - s=s->down(); + s=idown(s); else if (streq(name,"parent")) s=parent(s); else if (streq(name,"parent_to")) @@ -259,9 +259,9 @@ else if (streq(name,"daughtern")) s=daughtern(s); else if (streq(name,"last")) - s=s->last(); + s=last(s); else if (streq(name,"first")) - s=s->first(); + s=first(s); else if (strncmp(name,"R:",2) == 0) // new relation structure s = s->as_relation(&name[2]); else if (s->f_present(Sname)) diff -Nru festival-2.4~release/src/arch/festival/festival.cc festival-2.5.0/src/arch/festival/festival.cc --- festival-2.4~release/src/arch/festival/festival.cc 2014-12-20 15:38:20.000000000 +0000 +++ festival-2.5.0/src/arch/festival/festival.cc 2017-09-04 15:54:08.000000000 +0000 @@ -160,7 +160,7 @@ EST_String b; b = EST_String("(load ")+quote_string(fname,"\"","\\",1)+")"; // I used to do the above without the b intermediate variable - // but that caused a crash for some compilers on some machines + // but that caused a crash for some compilers on some machines return festival_eval_command(b); } @@ -182,9 +182,10 @@ } else { - EST_String ll = command; // copy it; - l = read_from_string((char *)ll); - leval(l,NIL); + char *w = wstrdup((const char *)command); // copy it; + l = read_from_string(w); + leval(l,NIL); + wfree(w); rvalue = TRUE; } diff -Nru festival-2.4~release/src/arch/festival/utterance.cc festival-2.5.0/src/arch/festival/utterance.cc --- festival-2.4~release/src/arch/festival/utterance.cc 2010-11-05 14:13:02.000000000 +0000 +++ festival-2.5.0/src/arch/festival/utterance.cc 2017-09-04 15:54:08.000000000 +0000 @@ -296,8 +296,8 @@ { LISP desc = cons(strintern(s->name()), cons(item_features(s, false),NIL)); - return cons(cons(desc,stream_tree_to_lisp(s->down())), - stream_tree_to_lisp(s->next())); + return cons(cons(desc,stream_tree_to_lisp(idown(s))), + stream_tree_to_lisp(inext(s))); } } @@ -414,22 +414,22 @@ static LISP item_next(LISP li) { - return (li == NIL) ? NIL : siod(item(li)->next()); + return (li == NIL) ? NIL : siod(inext(item(li))); } static LISP item_prev(LISP li) { - return (li == NIL) ? NIL : siod(item(li)->prev()); + return (li == NIL) ? NIL : siod(iprev(item(li))); } static LISP item_up(LISP li) { - return (li == NIL) ? NIL : siod(item(li)->up()); + return (li == NIL) ? NIL : siod(iup(item(li))); } static LISP item_down(LISP li) { - return (li == NIL) ? NIL : siod(item(li)->down()); + return (li == NIL) ? NIL : siod(idown(item(li))); } static LISP item_parent(LISP li) diff -Nru festival-2.4~release/src/arch/festival/viterbi.cc festival-2.5.0/src/arch/festival/viterbi.cc --- festival-2.4~release/src/arch/festival/viterbi.cc 2010-11-05 14:13:02.000000000 +0000 +++ festival-2.5.0/src/arch/festival/viterbi.cc 2017-09-04 15:54:08.000000000 +0000 @@ -90,7 +90,7 @@ } // Map internal ids back to strings - for (EST_Item *p=u->relation(f.S("Relation"))->head(); p != 0; p=p->next()) + for (EST_Item *p=u->relation(f.S("Relation"))->head(); p != 0; p=inext(p)) if (wfst == 0) p->set(f.S("return_feat"),ngram->get_vocab_word(p->I("gv_id"))); else diff -Nru festival-2.4~release/src/arch/festival/wave.cc festival-2.5.0/src/arch/festival/wave.cc --- festival-2.4~release/src/arch/festival/wave.cc 2010-11-05 14:13:02.000000000 +0000 +++ festival-2.5.0/src/arch/festival/wave.cc 2017-09-04 15:54:08.000000000 +0000 @@ -418,17 +418,17 @@ static void utt_save_f0_from_targets(EST_Utterance *u,EST_String &filename) { // Modifications by Gregor Moehler to do proper target tracing (GM) - EST_Item *s; + EST_Item *s; EST_Track f0; float p = 0.0; - float length = u->relation("Segment")->last()->f("end"); + float length = u->relation("Segment")->rlast()->f("end"); int i,frames = (int)(length / 0.010); f0.resize(frames,4); EST_Item *ptval, *tval; - ptval = tval = u->relation("Target")->first_leaf(); - for (i=0,s=u->relation("Segment")->first(); s != 0; s=s->next()) + ptval = tval = first_leaf(u->relation("Target")->first()); + for (i=0,s=u->relation("Segment")->first(); s != 0; s=inext(s)) { if (i >= frames) break; // may hit here one before end diff -Nru festival-2.4~release/src/modules/base/ff.cc festival-2.5.0/src/modules/base/ff.cc --- festival-2.4~release/src/modules/base/ff.cc 2014-12-11 15:15:21.000000000 +0000 +++ festival-2.5.0/src/modules/base/ff.cc 2017-09-04 15:54:08.000000000 +0000 @@ -67,10 +67,10 @@ << endl; festival_error(); } - if (n->prev() == 0) + if (iprev(n) == 0) return EST_Val(s->F("end", 0)); else - return EST_Val(s->F("end", 0)-(n->prev()->F("end",0))); + return EST_Val(s->F("end", 0)-(iprev(n)->F("end",0))); } static EST_Val ff_syllable_duration(EST_Item *s) @@ -85,11 +85,11 @@ else { EST_Item *fd = daughter1(n); - EST_Item *ld = fd->last(); + EST_Item *ld = last(fd); if (ld == 0) return val_int0; - EST_Item *ps = as(fd,"Segment")->prev(); + EST_Item *ps = iprev(as(fd,"Segment")); if (ps == 0) return ld->F("end",0); else @@ -119,7 +119,7 @@ if (ld == 0) return val_int0; - EST_Item *ps = as(fd,"Segment")->prev(); + EST_Item *ps = iprev(as(fd,"Segment")); if (ps == 0) return ld->F("end",0); else @@ -135,10 +135,10 @@ static EST_Val ff_seg_start(EST_Item *s) { EST_Item *n = as(s,"Segment"); - if (n->prev() == 0) + if (iprev(n) == 0) return default_val_float; else - return n->prev()->F("end",0); + return iprev(n)->F("end",0); } static EST_Val ff_syl_start(EST_Item *s) @@ -194,14 +194,14 @@ if (nn == 0) // its not really a syllable return EST_Val("single"); - else if (nn->next() == 0) + else if (inext(nn) == 0) { - if (nn->prev() == 0) + if (iprev(nn) == 0) return EST_Val("single"); else return EST_Val("final"); } - else if (nn->prev() == 0) + else if (iprev(nn) == 0) return EST_Val("initial"); else return EST_Val("mid"); @@ -215,7 +215,7 @@ static EST_Val val3 = EST_Val(3); static EST_Val val2 = EST_Val(2); - if ((nn == 0) || (nn->next() != 0)) + if ((nn == 0) || (inext(nn) != 0)) return val_int1; else { @@ -244,7 +244,7 @@ if (nn == 0) return val_int1; // no sylstructure so maybe its standalone - else if (nn->next() != 0) // word internal + else if (inext(nn) != 0) // word internal return val_int0; else if (parent(nn) == 0) // not in a word -- strange return val_int1; @@ -261,7 +261,7 @@ if (nn == 0) return val_int1; // no sylstructure so maybe its standalone - else if (nn->next() != 0) // word internal + else if (inext(nn) != 0) // word internal return val_int0; else if (parent(nn) == 0) // not in a word -- strange return val_int1; @@ -292,7 +292,7 @@ EST_Item *nn = as(s,"Intonation"); EST_Item *p; - for (p=daughter1(nn); p; p=p->next()) + for (p=daughter1(nn); p; p=inext(p)) if (p->name().contains("*")) return EST_Val(p->name()); return EST_Val("NONE"); @@ -304,7 +304,7 @@ EST_Item *nn = as(s,"Intonation"); EST_Item *p; - for (p=daughter1(nn); p; p=p->next()) + for (p=daughter1(nn); p; p=inext(p)) { EST_String l = p->name(); if ((l.contains("%")) || (l.contains("-"))) @@ -350,7 +350,7 @@ EST_Item *p; int size; - for (p=daughter1(nn),size=0; p; p=p->next(),size++) + for (p=daughter1(nn),size=0; p; p=inext(p),size++) if (ph_is_vowel(p->name())) return EST_Val(size); @@ -365,7 +365,7 @@ EST_Item *p; int size; - for (p=daughter1(nn),size=0; p; p=p->next(),size++) + for (p=daughter1(nn),size=0; p; p=inext(p),size++) if (ph_is_vowel(p->name())) return EST_Val(p->name()); @@ -380,7 +380,7 @@ EST_Item *p; int size; - for (p=daughter1(nn)->last(),size=1; p; p=p->prev(),size++) + for (p=daughtern(nn),size=1; p; p=iprev(p),size++) if (ph_is_vowel(p->name())) return EST_Val(size); @@ -396,11 +396,11 @@ if (daughter1(nn) == 0) return val_int0; // no segments in syllable - else if ((ps = as(daughter1(nn),"Segment")->prev()) != 0) + else if ((ps = iprev(as(daughter1(nn),"Segment"))) != 0) start = ps->F("end",0); unvox = start; - for (p=daughter1(nn); p != 0; p=p->next()) + for (p=daughter1(nn); p != 0; p=inext(p)) { if ((ph_is_vowel(p->name())) || (ph_is_voiced(p->name()))) @@ -418,7 +418,7 @@ EST_Item *nn = as(s,"SylStructure"); EST_Item *p; - for (p=daughter1(nn); p != 0; p=p->next()) + for (p=daughter1(nn); p != 0; p=inext(p)) { if (ph_is_vowel(p->name())) return EST_Val(ff_seg_start(p)); @@ -433,7 +433,7 @@ EST_Item *nn = as(s,"SylStructure"); EST_Item *p; - for (p=nn->next(); p; p=p->next()) + for (p=inext(nn); p; p=inext(p)) if (ph_is_vowel(p->name())) return EST_Val("onset"); return EST_Val("coda"); @@ -442,9 +442,9 @@ static EST_Val ff_seg_onset_stop(EST_Item *s) { // 1 if onset of the syllable attached to this segment has a stop - EST_Item *nn = as(s,"SylStructure")->first(); + EST_Item *nn = first(as(s,"SylStructure")); - for ( ; nn ; nn=nn->next()) + for ( ; nn ; nn=inext(nn)) { if (ph_is_vowel(nn->name())) return val_string0; @@ -457,9 +457,9 @@ static EST_Val ff_seg_coda_fric(EST_Item *s) { // 1 if coda of the syllable attached to this segment has a fricative - EST_Item *nn = as(s,"SylStructure")->last(); + EST_Item *nn = last(as(s,"SylStructure")); - for ( ; nn ; nn=nn->prev()) + for ( ; nn ; nn=iprev(nn)) { if (ph_is_vowel(nn->name())) return val_string0; @@ -476,7 +476,7 @@ EST_Item *p; int pos=0; - for (p=nn->first(); p; p=p->next(),pos++) + for (p=first(nn); p; p=inext(p),pos++) if (p == nn) return EST_Val(pos); // don't think you can get here @@ -488,7 +488,7 @@ // 1 if seg is syllable initial, 0 otherwise. EST_Item *nn = as(s,"SylStructure"); - if (nn->prev() == 0) + if (iprev(nn) == 0) return val_string1; else return val_string0; @@ -499,7 +499,7 @@ // 1 if seg is syllable initial, 0 otherwise. EST_Item *nn = as(s,"SylStructure"); - if (nn->next() == 0) + if (inext(nn) == 0) return val_string1; else return val_string0; @@ -512,7 +512,7 @@ EST_Item *p; int pos=0; - for (p=nn->first(); p; p=p->next(),pos++) + for (p=first(nn); p; p=inext(p),pos++) if (p == nn) return EST_Val(pos); // don't think you can get here @@ -526,7 +526,7 @@ EST_Item *p; int pos=0; - for (p=nn->first(); p; p=p->next(),pos++) + for (p=first(nn); p; p=inext(p),pos++) if (p == nn) return EST_Val(pos); // don't think you can get here @@ -539,9 +539,9 @@ // a new number group EST_Item *nn = as(s,"Token"); - if ((nn->next() == 0) && + if ((inext(nn) == 0) && (parent(nn)->name().matches(RXdouble)) && - (parent(nn)->next()->name().matches(RXdouble))) + (inext(parent(nn))->name().matches(RXdouble))) return val_string1; else return val_string0; @@ -558,7 +558,7 @@ EST_Item *nn = as(s,"SylStructure"); EST_Item *p; - for (p=daughter1(nn); p; p = p->next()) + for (p=daughter1(nn); p; p = inext(p)) { if (ph_is_vowel(p->name())) return ffeature(p,"R:Target.daughter1.f0"); @@ -607,7 +607,7 @@ float smid = ff_seg_mid(s); EST_Utterance *u = get_utt(s); - for (lastt=t=u->relation("Target")->first_leaf(); + for (lastt=t=first_leaf(u->relation("Target")->first()); next_leaf(t) != 0; t=next_leaf(t)) { if (smid <= t->F("pos",0)) @@ -638,12 +638,12 @@ EST_Item *nn = as(s,"Syllable"); // The first syllable in the phrase EST_Item *fsyl = - as(daughter1(as(parent(s,"SylStructure"),"Phrase")->first(),"SylStructure"), + as(daughter1(first(as(parent(s,"SylStructure"),"Phrase")),"SylStructure"), "Syllable"); EST_Item *p; int count; - for (count=0,p=nn; p != 0; p=p->prev(),count++) + for (count=0,p=nn; p != 0; p=iprev(p),count++) if (p == fsyl) return EST_Val(count); return EST_Val(count); @@ -655,12 +655,12 @@ EST_Item *nn = as(s,"Syllable"); // The last syllable in the phrase EST_Item *lsyl = - as(daughtern(as(parent(s,"SylStructure"),"Phrase")->last(),"SylStructure"), + as(daughtern(last(as(parent(s,"SylStructure"),"Phrase")),"SylStructure"), "Syllable"); EST_Item *p; int count; - for (count=0,p=nn; p != 0; p=p->next(),count++) + for (count=0,p=nn; p != 0; p=inext(p),count++) if (p == lsyl) return EST_Val(count); return EST_Val(count); @@ -671,13 +671,13 @@ // Number of stressed syllables since last phrase break EST_Item *nn = as(s,"Syllable"); EST_Item *fsyl = - as(daughter1(as(parent(s,"SylStructure"),"Phrase")->first(),"SylStructure"), + as(daughter1(first(as(parent(s,"SylStructure"),"Phrase")),"SylStructure"), "Syllable"); EST_Item *p; int count; if (nn == fsyl) return val_int0; - for (count=0,p=nn->prev(); (p != 0) && (p != fsyl); p = p->prev()) + for (count=0,p=iprev(nn); (p != 0) && (p != fsyl); p = iprev(p)) if (p->F(stressname,0) == 1) count ++; return EST_Val(count); @@ -692,13 +692,13 @@ // Number of stressed syllables since last phrase break EST_Item *nn = as(s,"Syllable"); EST_Item *fsyl = - as(daughter1(as(parent(s,"SylStructure"),"Phrase")->first(),"SylStructure"), + as(daughter1(first(as(parent(s,"SylStructure"),"Phrase")),"SylStructure"), "Syllable"); EST_Item *p; int count; if (nn == fsyl) return val_int0; - for (count=0,p=nn->prev(); (p != 0); p = p->prev()) + for (count=0,p=iprev(nn); (p != 0); p = iprev(p)) { if (p->F(stressname,0) == 1) count ++; @@ -713,13 +713,13 @@ EST_Item *nn = as(s,"Syllable"); // The last syllable in the phrase EST_Item *lsyl = - as(daughtern(as(parent(s,"SylStructure"),"Phrase")->last(),"SylStructure"), + as(daughtern(last(as(parent(s,"SylStructure"),"Phrase")),"SylStructure"), "Syllable"); EST_Item *p; int count; if (nn == lsyl) return val_int0; - for (count=0,p=nn->next(); (p != 0); p=p->next()) + for (count=0,p=inext(nn); (p != 0); p=inext(p)) { if (p->F(stressname,0) == 1) count ++; @@ -734,13 +734,13 @@ EST_Item *nn = as(s,"Syllable"); // The first syllable in the phrase EST_Item *fsyl = - as(daughter1(as(parent(s,"SylStructure"),"Phrase")->first(),"SylStructure"), + as(daughter1(first(as(parent(s,"SylStructure"),"Phrase")),"SylStructure"), "Syllable"); EST_Item *p; int count; if (nn == fsyl) return val_int0; - for (count=0,p=nn->prev(); (p != 0) && (p != fsyl); p = p->prev()) + for (count=0,p=iprev(nn); (p != 0) && (p != fsyl); p = iprev(p)) if (ff_syl_accented(p) == 1) count ++; return EST_Val(count); @@ -756,13 +756,13 @@ EST_Item *nn = as(s,"Syllable"); // The first syllable in the phrase EST_Item *fsyl = - as(daughter1(as(parent(s,"SylStructure"),"Phrase")->first(),"SylStructure"), + as(daughter1(first(as(parent(s,"SylStructure"),"Phrase")),"SylStructure"), "Syllable"); EST_Item *p; int count; if (nn == fsyl) return val_int0; - for (count=0,p=nn->prev(); (p != 0); p = p->prev()) + for (count=0,p=iprev(nn); (p != 0); p = iprev(p)) { if (ff_syl_accented(p) == 1) count ++; @@ -777,13 +777,13 @@ EST_Item *nn = as(s,"Syllable"); // The last syllable in the phrase EST_Item *lsyl = - as(daughtern(as(parent(s,"SylStructure"),"Phrase")->last(),"SylStructure"), + as(daughtern(last(as(parent(s,"SylStructure"),"Phrase")),"SylStructure"), "Syllable"); EST_Item *p; int count; if (nn == lsyl) return val_int0; - for (count=0,p=nn->next(); (p != 0); p=p->next()) + for (count=0,p=inext(nn); (p != 0); p=inext(p)) { if (ff_syl_accented(p) == 1) count ++; @@ -799,7 +799,7 @@ EST_Item *p; int count; - for (count=0,p=nn->prev(); p != 0; p=p->prev(),count++) + for (count=0,p=iprev(nn); p != 0; p=iprev(p),count++) if (ff_syl_accented(p) == 1) return EST_Val(count); @@ -813,7 +813,7 @@ EST_Item *p; int count; - for (count=0,p=nn->next(); p != 0; p=p->next(),count++) + for (count=0,p=inext(nn); p != 0; p=inext(p),count++) if (ff_syl_accented(p) == 1) return EST_Val(count); @@ -827,7 +827,7 @@ EST_Item *p; int count; - for (count=0,p=nn->prev(); p != 0; p=p->prev()) + for (count=0,p=iprev(nn); p != 0; p=iprev(p)) { if (p->name() == "BB") return EST_Val(count); diff -Nru festival-2.4~release/src/modules/base/phrasify.cc festival-2.5.0/src/modules/base/phrasify.cc --- festival-2.4~release/src/modules/base/phrasify.cc 2014-12-11 15:24:37.000000000 +0000 +++ festival-2.5.0/src/modules/base/phrasify.cc 2017-09-04 15:54:08.000000000 +0000 @@ -112,12 +112,12 @@ u->create_relation("Phrase"); - for (w=u->relation("Word")->first(); w != 0; w = w->next()) + for (w=u->relation("Word")->first(); w != 0; w = inext(w)) { if (phr == 0) phr = add_phrase(u); append_daughter(phr,"Phrase",w); - if (w->next() == 0) + if (inext(w) == 0) { w->set("pbreak","B"); phr->set_name("4"); @@ -136,7 +136,7 @@ u->create_relation("Phrase"); tree = siod_get_lval("phrase_cart_tree","no phrase cart tree"); - for (w=u->relation("Word")->first(); w != 0; w = w->next()) + for (w=u->relation("Word")->first(); w != 0; w = inext(w)) { if (phr == 0) phr = add_phrase(u); @@ -225,7 +225,7 @@ pbyp_get_params(siod_get_lval("phr_break_params",NULL)); gc_protect(&bb_tags); - for (w=u->relation("Word")->first(); w != 0; w = w->next()) + for (w=u->relation("Word")->first(); w != 0; w = inext(w)) { // Set up tag index for pos ngram EST_String lpos = map_pos(pos_map,w->f("pos").string()); w->set("phr_pos",lpos); @@ -244,7 +244,7 @@ // Given predicted break, go through and add phrases u->create_relation("Phrase"); - for (w=u->relation("Word")->first(); w != 0; w = w->next()) + for (w=u->relation("Word")->first(); w != 0; w = inext(w)) { w->set("pbreak",bb_ngram-> get_vocab_word(w->f("pbreak_index").Int())); @@ -306,7 +306,7 @@ // Given predicted break, go through and add phrases u->create_relation("Phrase"); - for (w=u->relation("Word")->first(); w != 0; w = w->next()) + for (w=u->relation("Word")->first(); w != 0; w = inext(w)) { w->set("pbreak",bb_ngram-> get_vocab_word(w->f("pbreak_index").Int())); @@ -351,7 +351,7 @@ //*cdebug << get_c_string(answer) <next() == 0) // end of utterances so force a break + if (inext(s) == 0) // end of utterances so force a break { EST_VTCandidate *c = new EST_VTCandidate; c->s = s; @@ -415,12 +415,12 @@ if (bb_pos_ngram->order() == 4) { window[1] = s->I("pos_index",0); - if (s->prev() != 0) - window[0] = s->prev()->I("pos_index",0); + if (iprev(s) != 0) + window[0] = iprev(s)->I("pos_index",0); else window[0] = pos_p_start_tag; - if (s->next() != 0) - window[2] = s->next()->I("pos_index",0); + if (inext(s) != 0) + window[2] = inext(s)->I("pos_index",0); else window[2] = pos_n_start_tag; *cdebug << window[0] << " " << window[1] << " " << window[2] << " " ; @@ -428,27 +428,27 @@ else if (bb_pos_ngram->order() == 3) { window[0] = s->I("pos_index",0); - if (s->next() != 0) - window[1] = s->next()->I("pos_index",0); + if (inext(s) != 0) + window[1] = inext(s)->I("pos_index",0); else window[1] = pos_n_start_tag; } else if (bb_pos_ngram->order() == 5) { // This is specific for some set of pos tagsets window[2] = s->I("pos_index",0); - if (s->prev() != 0) + if (iprev(s) != 0) { - window[1] = s->prev()->I("pos_index",0); + window[1] = iprev(s)->I("pos_index",0); } else { window[1] = pos_p_start_tag; } - if (s->next() != 0) + if (inext(s) != 0) { - window[3] = s->next()->I("pos_index",0); - if (s->next()->next() != 0) - window[0] = s->next()->next()->I("pos_index",0); + window[3] = inext(s)->I("pos_index",0); + if (inext(inext(s)) != 0) + window[0] = inext(inext(s))->I("pos_index",0); else window[0] = 0; } @@ -478,7 +478,7 @@ c->next = all_c; all_c = c; // but then if you give only one option ... } - else if (s->next() == 0) // end of utterances so force a break + else if (inext(s) == 0) // end of utterances so force a break { EST_VTCandidate *c = new EST_VTCandidate; c->s = s; @@ -546,7 +546,7 @@ // If this word came from inside a token reduce the // probability of a break if ((ffeature(s,"R:Token.n.name") != "0") && - ((s->as_relation("Token")->first()->length()) < 7)) + ((first(s->as_relation("Token"))->length()) < 7)) { float weight = ffeature(s,"pbreak_scale"); if (weight == 0) weight = 0.5; @@ -730,7 +730,7 @@ } - if (d && d->c && d->c->s && (d->c->s->next()->next()) == NULL) + if (d && d->c && d->c->s && (inext(inext(d->c->s))) == NULL) { /* must be in final state */ printf("must be in final state\n"); if (i != bb_track->num_frames()) @@ -801,7 +801,7 @@ pbyp_get_params(siod_get_lval("phr_break_params",NULL)); gc_protect(&bb_tags); - for (w=u->relation("Word")->first(); w != 0; w = w->next()) + for (w=u->relation("Word")->first(); w != 0; w = inext(w)) { // Set up tag index for pos ngram EST_String lpos = map_pos(pos_map,w->f("pos").string()); w->set("phr_pos",lpos); @@ -818,7 +818,7 @@ // Given predicted break, go through and add phrases u->create_relation("Phrase"); - for (w=u->relation("Word")->first(); w != 0; w = w->next()) + for (w=u->relation("Word")->first(); w != 0; w = inext(w)) { w->set("pbreak",bb_ngram-> get_vocab_word(w->f("pbreak_index").Int())); diff -Nru festival-2.4~release/src/modules/base/pos.cc festival-2.5.0/src/modules/base/pos.cc --- festival-2.4~release/src/modules/base/pos.cc 2010-11-05 14:13:03.000000000 +0000 +++ festival-2.5.0/src/modules/base/pos.cc 2017-09-04 15:54:08.000000000 +0000 @@ -92,7 +92,7 @@ LISP l; // Map pos tagset to desired set LISP pos_map = siod_get_lval("pos_map",NULL); - for (w=u->relation("Word")->first(); w != 0; w = w->next()) + for (w=u->relation("Word")->first(); w != 0; w = inext(w)) { // convert pos index into string value pos = pos_ngram->get_vocab_word(w->f("pos_index").Int()); diff -Nru festival-2.4~release/src/modules/base/postlex.cc festival-2.5.0/src/modules/base/postlex.cc --- festival-2.4~release/src/modules/base/postlex.cc 2010-11-05 14:13:03.000000000 +0000 +++ festival-2.5.0/src/modules/base/postlex.cc 2017-09-04 15:54:08.000000000 +0000 @@ -74,7 +74,7 @@ for (s=u->relation("Segment")->first(); s != 0; s = t) { - t = s->next(); + t = inext(s); if (wagon_predict(s,r_red_tree) == "delete") s->unref_all(); } @@ -93,7 +93,7 @@ if ((vow_table == NIL) || (red_tree == NIL)) return; // ain't anything to do - for (s=u->relation("Syllable")->first(); s != 0; s = s->next()) + for (s=u->relation("Syllable")->first(); s != 0; s = inext(s)) { if (wagon_predict(s,red_tree) == "1") vowel_reduce(s,vow_table); @@ -107,7 +107,7 @@ EST_Item *seg; LISP vreduce=NIL; - for (seg=daughter1(syl,"SylStructure"); seg; seg=seg->next()) + for (seg=daughter1(syl,"SylStructure"); seg; seg=inext(seg)) { if (ph_is_vowel(seg->name())) { diff -Nru festival-2.4~release/src/modules/base/word.cc festival-2.5.0/src/modules/base/word.cc --- festival-2.4~release/src/modules/base/word.cc 2010-11-05 14:13:03.000000000 +0000 +++ festival-2.5.0/src/modules/base/word.cc 2017-09-04 15:54:08.000000000 +0000 @@ -62,7 +62,7 @@ u->create_relation("Segment"); SylStructure = u->create_relation("SylStructure"); - for (w=u->relation("Word")->first(); w != 0; w = w->next()) + for (w=u->relation("Word")->first(); w != 0; w = inext(w)) { lpos = NIL; pos = (EST_String)ffeature(w,"hg_pos"); @@ -111,7 +111,7 @@ u->create_relation("Segment"); SylStructure = u->create_relation("SylStructure"); - for (w=u->relation("Word")->first(); w != 0; w = w->next()) + for (w=u->relation("Word")->first(); w != 0; w = inext(w)) { lpos = NIL; pos = EST_String(ffeature(w,"hg_pos")); diff -Nru festival-2.4~release/src/modules/clunits/acost.cc festival-2.5.0/src/modules/clunits/acost.cc --- festival-2.4~release/src/modules/clunits/acost.cc 2010-11-05 14:13:03.000000000 +0000 +++ festival-2.5.0/src/modules/clunits/acost.cc 2017-09-04 15:54:08.000000000 +0000 @@ -84,12 +84,12 @@ c_si->set_val("Acoustic_Coeffs", est_val(track)); // Now add subtracks for each segment - for (EST_Item *s=u->relation(segrelation)->first(); s != 0; s=s->next()) + for (EST_Item *s=u->relation(segrelation)->first(); s != 0; s=inext(s)) { EST_Track *st = new EST_Track; float start = ffeature(s,"segment_start"); float end = ffeature(s,"segment_end"); - if (s->prev()) + if (iprev(s)) start -= ac_left_context* ffeature(s,"p.segment_duration").Float(); int startf = track->index(start); diff -Nru festival-2.4~release/src/modules/clunits/cljoin.cc festival-2.5.0/src/modules/clunits/cljoin.cc --- festival-2.4~release/src/modules/clunits/cljoin.cc 2010-11-05 14:13:03.000000000 +0000 +++ festival-2.5.0/src/modules/clunits/cljoin.cc 2017-09-04 15:54:08.000000000 +0000 @@ -118,7 +118,7 @@ n_i = 0; s_i = 0; ltime = 0; - for (u = source_lab.head(); u; u = u->next()) + for (u = source_lab.head(); u; u = inext(u)) { u_frames = u->I("num_frames"); // stime = source_pm.t(s_i+u_frames-1) - source_pm.t(s_i); diff -Nru festival-2.4~release/src/modules/clunits/clunits.cc festival-2.5.0/src/modules/clunits/clunits.cc --- festival-2.4~release/src/modules/clunits/clunits.cc 2010-11-05 14:13:03.000000000 +0000 +++ festival-2.5.0/src/modules/clunits/clunits.cc 2017-09-04 15:54:08.000000000 +0000 @@ -129,7 +129,7 @@ setup_clunits_params(); f = u->relation("Segment")->head(); - for (s=f; s; s=s->next()) + for (s=f; s; s=inext(s)) s->set_val("clunit_name",ffeature(s,clunit_name_feat)); if (f) @@ -164,7 +164,7 @@ cldb = check_cldb(); // make sure there is one loaded units = u->create_relation("Unit"); - for (s=u->relation("Segment")->head(); s != 0; s=s->next()) + for (s=u->relation("Segment")->head(); s != 0; s=inext(s)) { EST_Item *unit = units->append(); CLunit *db_unit = clunit(s->f("unit_id")); @@ -172,12 +172,12 @@ unit->set_name(db_unit->name); unit->set("fileid",db_unit->fileid); // These should be modified from the optimal coupling - if ((s->prev()) && (s->f_present("unit_this_move"))) + if ((iprev(s)) && (s->f_present("unit_this_move"))) st = s->F("unit_this_move"); else st = db_unit->start; - if (s->next() && (s->next()->f_present("unit_prev_move"))) - e = s->next()->F("unit_prev_move"); + if (inext(s) && (inext(s)->f_present("unit_prev_move"))) + e = inext(s)->F("unit_prev_move"); else e = db_unit->end; if ((e-st) < 0.011) @@ -201,7 +201,7 @@ // Make it look as much like the diphones as possible for // the rest of the code ss = u->create_relation("SourceSegments"); - for (s = u->relation("Segment")->head(); s != 0 ; s = s->next()) + for (s = u->relation("Segment")->head(); s != 0 ; s = inext(s)) { EST_Item *d = ss->append(); d->set_name(ffeature(s,"clunit_name")); @@ -222,8 +222,8 @@ float t_time = 0.0, end; p_time = 0.0; - for (s = source_lab.head(), u = diphone_stream.head(); u; u = u->next(), - s = s->next()) + for (s = source_lab.head(), u = diphone_stream.head(); u; + u = inext(u), s = inext(s)) { pm = track(u->f("coefs")); if (pm == 0) @@ -259,7 +259,7 @@ EST_Item *s; int size,i,k,c; - for (size=0,s=u->relation("Unit")->head(); s != 0; s = s->next()) + for (size=0,s=u->relation("Unit")->head(); s != 0; s = inext(s)) size += wave(s->f("sig"))->num_samples(); if (u->relation("Unit")->head()) @@ -269,7 +269,7 @@ } i = w->num_samples(); w->resize(size); // its maximum size - for (s=u->relation("Unit")->head()->next(); s; s=s->next()) + for (s=inext(u->relation("Unit")->head()); s; s=inext(s)) { w1 = wave(s->f("sig")); // Find last zero crossing @@ -308,7 +308,7 @@ int width, lwidth; EST_Wave *www=0; - for (size=0,s=u->relation("Unit")->head(); s != 0; s = s->next()) + for (size=0,s=u->relation("Unit")->head(); s != 0; s = inext(s)) size += wave(s->f("sig"))->num_samples(); if (u->relation("Unit")->head()) @@ -320,7 +320,7 @@ w->resize(size); // its maximum size wi=0; lwidth = width = 0; - for (s=u->relation("Unit")->head(); s; s=s->next()) + for (s=u->relation("Unit")->head(); s; s=inext(s)) { w1 = wave(s->f("sig")); t1 = track(s->f("coefs")); @@ -355,14 +355,13 @@ EST_Utterance *u = get_c_utt(utt); EST_Wave *w = new EST_Wave; EST_Wave *w1 = 0; - EST_Track *t1 = 0; EST_Item *witem = 0; EST_Item *s; int size,i,wi; int samp_end, samp_start; EST_Wave *www=0; - for (size=0,s=u->relation("Unit")->head(); s != 0; s = s->next()) + for (size=0,s=u->relation("Unit")->head(); s != 0; s = inext(s)) { samp_end = s->I("samp_end"); samp_start = s->I("samp_start"); @@ -377,7 +376,7 @@ } w->resize(size); // its maximum size wi=0; - for (s=u->relation("Unit")->head(); s; s=s->next()) + for (s=u->relation("Unit")->head(); s; s=inext(s)) { samp_end = s->I("samp_end"); samp_start = s->I("samp_start"); @@ -389,7 +388,6 @@ (float)samp_end/(float)w->sample_rate(), w1->num_samples(), samp_end); */ - t1 = track(s->f("coefs")); for (i=samp_start; ia_no_check(wi) = w1->a_no_check(i); /* printf("%d %f\n",wi,(float)wi/(float)w->sample_rate()); */ @@ -487,7 +485,7 @@ // An experiment, for all candidates of the previous // item whose following is of this phone type, include // them as a candidate - EST_Item *ppp = s->prev(); + EST_Item *ppp = iprev(s); if (ppp) { EST_VTCandidate *lc = vtcand(ppp->f("unit_cands")); diff -Nru festival-2.4~release/src/modules/clustergen/clustergen.cc festival-2.5.0/src/modules/clustergen/clustergen.cc --- festival-2.4~release/src/modules/clustergen/clustergen.cc 2014-11-16 17:16:55.000000000 +0000 +++ festival-2.5.0/src/modules/clustergen/clustergen.cc 2017-09-04 15:54:08.000000000 +0000 @@ -2,7 +2,7 @@ /* */ /* Language Technologies Institute */ /* Carnegie Mellon University */ -/* Copyright (c) 2005-2010 */ +/* Copyright (c) 2005-2017 */ /* All Rights Reserved. */ /* */ /* Permission is hereby granted, free of charge, to use and distribute */ @@ -53,7 +53,7 @@ void festival_clustergen_init(void) { proclaim_module("clustergen_engine", - "Copyright (C) Carnegie Mellon University 2005-2014\n"); + "Copyright (C) Carnegie Mellon University 2005-2017\n"); init_subr_3("mlsa_resynthesis", mlsa_resynthesis, "(mlsa_resynthesis TRACK STRTRACK FILTERTRACK)\n\ diff -Nru festival-2.4~release/src/modules/clustergen/HTS_vocoder_me.cc festival-2.5.0/src/modules/clustergen/HTS_vocoder_me.cc --- festival-2.4~release/src/modules/clustergen/HTS_vocoder_me.cc 2013-02-18 15:10:50.000000000 +0000 +++ festival-2.5.0/src/modules/clustergen/HTS_vocoder_me.cc 2017-09-04 15:54:08.000000000 +0000 @@ -182,156 +182,156 @@ double beta, double volume, double *rawdata, HTS_Audio * audio) { - double x; - int i, j; - int k; - short xs; - int rawidx = 0; - double p; - HTS_Vocoder *v = v_me->v; /* access to original HTS_Vocoder struct */ - double xpulse; - double xnoise; - double fxpulse; - double fxnoise; - double e1, e2; - - - /* Copy in str's and build pulse and noise shaping filter for this frame */ - for (i = 0; i < v_me->filter_order; i++) - { - v_me->hp[i] = v_me->hn[i] = 0.0; - for (j = 0; j < v_me->num_filters; j++) - { - v_me->hp[i] += strengths[j] * v_me->h[j][i]; - v_me->hn[i] += (1 - strengths[j]) * v_me->h[j][i]; - } - } + double x = 0.0; + int i, j; + int k; + short xs; + int rawidx = 0; + double p; + HTS_Vocoder *v = v_me->v; /* access to original HTS_Vocoder struct */ + double xpulse; + double xnoise; + double fxpulse; + double fxnoise; + double e1, e2; - /* lf0 -> pitch */ - if (lf0 == LZERO) - p = 0.0; - else - p = v->rate / exp(lf0); - - /* first time */ - if (v->p1 < 0.0) { - HTS_Vocoder_initialize_excitation(v, 0); - if (v->stage == 0) { /* for MCP */ - HTS_mc2b(spectrum, v->c, m, alpha); - } else { /* for LSP */ - if (v->use_log_gain) - v->c[0] = LZERO; - else - v->c[0] = ZERO; - for (i = 1; i <= m; i++) - v->c[i] = i * PI / (m + 1); - HTS_lsp2mgc(v, v->c, v->c, m, alpha); - HTS_mc2b(v->c, v->c, m, alpha); - HTS_gnorm(v->c, v->c, m, v->gamma); - for (i = 1; i <= m; i++) - v->c[i] *= v->gamma; - } - } - - HTS_Vocoder_start_excitation(v, p, 0); - if (v->stage == 0) { /* for MCP */ - HTS_Vocoder_postfilter_mcp(v, spectrum, m, alpha, beta); - HTS_mc2b(spectrum, v->cc, m, alpha); - for (i = 0; i <= m; i++) - v->cinc[i] = (v->cc[i] - v->c[i]) * IPERIOD / v->fprd; - } else { /* for LSP */ - HTS_Vocoder_postfilter_lsp(v, spectrum, m, alpha, beta); - HTS_check_lsp_stability(spectrum, m); - HTS_lsp2mgc(v, spectrum, v->cc, m, alpha); - HTS_mc2b(v->cc, v->cc, m, alpha); - HTS_gnorm(v->cc, v->cc, m, v->gamma); - for (i = 1; i <= m; i++) - v->cc[i] *= v->gamma; - for (i = 0; i <= m; i++) - v->cinc[i] = (v->cc[i] - v->c[i]) * IPERIOD / v->fprd; - } - - - for (j = 0, i = (IPERIOD + 1) / 2; j < v->fprd; j++) - { - if (v->stage == 0) { /* for MCP */ - if (v->p1 == 0.0) - { - x = HTS_white_noise(v); - - /* MIXED EXCITATION */ - xnoise = x; - xpulse = 0.0; - } - else - { - if ((v->pc += 1.0) >= v->p1) + + /* Copy in str's and build pulse and noise shaping filter for this frame */ + for (i = 0; i < v_me->filter_order; i++) + { + v_me->hp[i] = v_me->hn[i] = 0.0; + for (j = 0; j < v_me->num_filters; j++) { - x = sqrt(v->p1); - v->pc = v->pc - v->p1; + v_me->hp[i] += strengths[j] * v_me->h[j][i]; + v_me->hn[i] += (1 - strengths[j]) * v_me->h[j][i]; } - else - { - x = 0.0; + } + + /* lf0 -> pitch */ + if (lf0 == LZERO) + p = 0.0; + else + p = v->rate / exp(lf0); + + /* first time */ + if (v->p1 < 0.0) { + HTS_Vocoder_initialize_excitation(v, 0); + if (v->stage == 0) { /* for MCP */ + HTS_mc2b(spectrum, v->c, m, alpha); + } else { /* for LSP */ + if (v->use_log_gain) + v->c[0] = LZERO; + else + v->c[0] = ZERO; + for (i = 1; i <= m; i++) + v->c[i] = i * PI / (m + 1); + HTS_lsp2mgc(v, v->c, v->c, m, alpha); + HTS_mc2b(v->c, v->c, m, alpha); + HTS_gnorm(v->c, v->c, m, v->gamma); + for (i = 1; i <= m; i++) + v->c[i] *= v->gamma; } + } - /* MIXED EXCITATION */ - xpulse = x; - xnoise = HTS_mseq(v); /* ABY: plus or minus 1 */ - } - - /* MIXED EXCITATION */ - /* The real work -- apply shaping filters to pulse and noise */ - fxpulse = fxnoise = 0.0; - for (k = v_me->filter_order - 1; k > 0; k--) - { - fxpulse += v_me->hp[k] * v_me->xp_sig[k]; - fxnoise += v_me->hn[k] * v_me->xn_sig[k]; - - v_me->xp_sig[k] = v_me->xp_sig[k-1]; - v_me->xn_sig[k] = v_me->xn_sig[k-1]; - } - - fxpulse += v_me->hp[0] * xpulse; - fxnoise += v_me->hn[0] * xnoise; - v_me->xp_sig[0] = xpulse; - v_me->xn_sig[0] = xnoise; - - x = fxpulse + fxnoise; /* excitation is pulse plus noise */ - - x *= exp(v->c[0]); - x = HTS_mlsadf(x, v->c, m, alpha, PADEORDER, v->d1); - - } else { /* for LSP */ - if (!NGAIN) - x *= v->c[0]; - x = HTS_mglsadf(x, v->c, m, alpha, v->stage, v->d1); + HTS_Vocoder_start_excitation(v, p, 0); + if (v->stage == 0) { /* for MCP */ + HTS_Vocoder_postfilter_mcp(v, spectrum, m, alpha, beta); + HTS_mc2b(spectrum, v->cc, m, alpha); + for (i = 0; i <= m; i++) + v->cinc[i] = (v->cc[i] - v->c[i]) * IPERIOD / v->fprd; + } else { /* for LSP */ + HTS_Vocoder_postfilter_lsp(v, spectrum, m, alpha, beta); + HTS_check_lsp_stability(spectrum, m); + HTS_lsp2mgc(v, spectrum, v->cc, m, alpha); + HTS_mc2b(v->cc, v->cc, m, alpha); + HTS_gnorm(v->cc, v->cc, m, v->gamma); + for (i = 1; i <= m; i++) + v->cc[i] *= v->gamma; + for (i = 0; i <= m; i++) + v->cinc[i] = (v->cc[i] - v->c[i]) * IPERIOD / v->fprd; } - x *= volume; - /* output */ - if (rawdata) - rawdata[rawidx++] = x; - if (audio) { - if (x > 32767.0) - xs = 32767; - else if (x < -32768.0) - xs = -32768; - else - xs = (short) x; - HTS_Audio_write(audio, xs); - } - - if (!--i) { - for (i = 0; i <= m; i++) - v->c[i] += v->cinc[i]; - i = IPERIOD; - } - } + for (j = 0, i = (IPERIOD + 1) / 2; j < v->fprd; j++) + { + if (v->stage == 0) { /* for MCP */ + if (v->p1 == 0.0) + { + x = HTS_white_noise(v); + + /* MIXED EXCITATION */ + xnoise = x; + xpulse = 0.0; + } + else + { + if ((v->pc += 1.0) >= v->p1) + { + x = sqrt(v->p1); + v->pc = v->pc - v->p1; + } + else + { + x = 0.0; + } + + /* MIXED EXCITATION */ + xpulse = x; + xnoise = HTS_mseq(v); /* ABY: plus or minus 1 */ + } + + /* MIXED EXCITATION */ + /* The real work -- apply shaping filters to pulse and noise */ + fxpulse = fxnoise = 0.0; + for (k = v_me->filter_order - 1; k > 0; k--) + { + fxpulse += v_me->hp[k] * v_me->xp_sig[k]; + fxnoise += v_me->hn[k] * v_me->xn_sig[k]; + + v_me->xp_sig[k] = v_me->xp_sig[k-1]; + v_me->xn_sig[k] = v_me->xn_sig[k-1]; + } + + fxpulse += v_me->hp[0] * xpulse; + fxnoise += v_me->hn[0] * xnoise; + v_me->xp_sig[0] = xpulse; + v_me->xn_sig[0] = xnoise; + + x = fxpulse + fxnoise; /* excitation is pulse plus noise */ + + x *= exp(v->c[0]); + x = HTS_mlsadf(x, v->c, m, alpha, PADEORDER, v->d1); + + } else { /* for LSP */ + if (!NGAIN) + x *= v->c[0]; + x = HTS_mglsadf(x, v->c, m, alpha, v->stage, v->d1); + } + + x *= volume; + + /* output */ + if (rawdata) + rawdata[rawidx++] = x; + if (audio) { + if (x > 32767.0) + xs = 32767; + else if (x < -32768.0) + xs = -32768; + else + xs = (short) x; + HTS_Audio_write(audio, xs); + } + + if (!--i) { + for (i = 0; i <= m; i++) + v->c[i] += v->cinc[i]; + i = IPERIOD; + } + } - HTS_Vocoder_end_excitation(v, nlpf); - HTS_movem(v->cc, v->c, m + 1); + HTS_Vocoder_end_excitation(v, nlpf); + HTS_movem(v->cc, v->c, m + 1); } /* HTS_Vocoder_clear_me: clear vocoder (mixed excitation) */ diff -Nru festival-2.4~release/src/modules/clustergen/mlsa_resynthesis.cc festival-2.5.0/src/modules/clustergen/mlsa_resynthesis.cc --- festival-2.4~release/src/modules/clustergen/mlsa_resynthesis.cc 2013-04-15 11:29:30.000000000 +0000 +++ festival-2.5.0/src/modules/clustergen/mlsa_resynthesis.cc 2017-09-04 15:54:08.000000000 +0000 @@ -173,9 +173,6 @@ double volume = 1.0; // Mixed Excitation Stuff - LISP filters; - LISP f; - int fl; int i, j; int me_num_filters = 0; int me_filter_order = 0; diff -Nru festival-2.4~release/src/modules/clustergen/simple_mlpg.cc festival-2.5.0/src/modules/clustergen/simple_mlpg.cc --- festival-2.4~release/src/modules/clustergen/simple_mlpg.cc 2010-02-02 17:35:51.000000000 +0000 +++ festival-2.5.0/src/modules/clustergen/simple_mlpg.cc 2017-11-22 15:06:56.000000000 +0000 @@ -77,6 +77,10 @@ /* */ /* Modified as a single file for inclusion in festival/flite */ /* May 2008 awb@cs.cmu.edu */ +/* */ +/* Modified again to make it work with cases where the covariance */ +/* is too small. */ +/* March 2016 pmuthuku@cs.cmu.edu */ /*-------------------------------------------------------------------*/ /* */ /* ML-Based Parameter Generation */ @@ -756,27 +760,53 @@ { long dim, clsnum; long i, j; + long num_err_frames = 0; double det; DVECTOR detvec = NODATA; + XBOOL zero_determinant_flag; + + // In cases where the determinant of the matrix ends up being + // zero, we can fix this by artificially setting the covariance + // to be a magic number. I chose this magic number by computing + // the mean of all MCEP SDs of all leaves in a standard RMS + // voice. - Prasanna + //double magic_covariance = 0.0962*0.0962; + double magic_covariance = 0.0962; clsnum = covmat->row; dim = covmat->col; // memory allocation detvec = xdvalloc(clsnum); for (i = 0; i < clsnum; i++) { + + zero_determinant_flag = XFALSE; + for (j = 0, det = 1.0; j < dim; j++) { det *= covmat->data[i][j]; if (det > 0.0) { - covmat->data[i][j] = 1.0 / covmat->data[i][j]; + covmat->data[i][j] = 1.0 / covmat->data[i][j]; } else { - cst_errmsg("error:(class %ld) determinant <= 0, det = %f\n", i, det); - xdvfree(detvec); - return NODATA; + zero_determinant_flag = XTRUE; + covmat->data[i][j] = 1.0 / magic_covariance; + det = pow(magic_covariance, (int)j+1); + // should actually be magic_covariance^2 } } + + if (zero_determinant_flag == XTRUE){ + num_err_frames++; + //printf("Using Prasanna's magic numbers in frame number %d", i); + } + detvec->data[i] = det; } + if (num_err_frames != 0) + { + printf("Warning: det <= 0. Using Prasanna's magic numbers in %d of %d frames\n", + (int)num_err_frames, (int)clsnum); + } + return detvec; } diff -Nru festival-2.4~release/src/modules/diphone/diphone.cc festival-2.5.0/src/modules/diphone/diphone.cc --- festival-2.4~release/src/modules/diphone/diphone.cc 2010-11-05 14:13:03.000000000 +0000 +++ festival-2.5.0/src/modules/diphone/diphone.cc 2017-09-04 15:54:08.000000000 +0000 @@ -413,7 +413,7 @@ for (s=u.relation("Segment")->first(); s != 0; s=ns) { - ns = s->next(); + ns = inext(ns); if ((ns != 0) && (ph_is_silence(s->name())) && (s->name() == ns->name())) // same *type* of silence @@ -598,7 +598,7 @@ ps->t_sz++; } seg_end = 0; - for (i=0,s=seg->first(); s != 0; s=s->next(),i++) + for (i=0,s=seg->first(); s != 0; s=inext(s),i++) { seg_start = seg_end; seg_end = s->F("end"); @@ -620,7 +620,7 @@ ps->cum_dur[i] += ps->duration[i]; for (rt = daughter1(s,"Target"); rt != 0; - rt = rt->next(),ps->t_sz++) + rt = inext(rt),ps->t_sz++) { ps->targ_phon[ps->t_sz] = i; ps->targ_freq[ps->t_sz] = rt->I("f0"); diff -Nru festival-2.4~release/src/modules/donovan/donovan.cc festival-2.5.0/src/modules/donovan/donovan.cc --- festival-2.4~release/src/modules/donovan/donovan.cc 2010-11-05 14:13:03.000000000 +0000 +++ festival-2.5.0/src/modules/donovan/donovan.cc 2017-09-04 15:54:08.000000000 +0000 @@ -196,7 +196,7 @@ ps->targ_freq = walloc(int,ps->t_max); ps->abs_targ = walloc(int,ps->t_max); - for (j=i=0,s=seg->first(); s != 0; s=s->next(),i++) + for (j=i=0,s=seg->first(); s != 0; s=inext(s),i++) { if (((cps=ft_get_param("PhoneSet")) == NIL) || ((streq(get_c_string(cps),"holmes")))) @@ -214,7 +214,7 @@ ps->cum_dur[i] += ps->duration[i]; for (rt = daughter1(s,"Target"); rt != 0; - rt = rt->next(),j++) + rt = inext(rt),j++) { ps->targ_phon[j] = i; ps->targ_freq[j] = rt->I("f0"); diff -Nru festival-2.4~release/src/modules/Duration/duration.cc festival-2.5.0/src/modules/Duration/duration.cc --- festival-2.4~release/src/modules/Duration/duration.cc 2010-11-05 15:21:36.000000000 +0000 +++ festival-2.5.0/src/modules/Duration/duration.cc 2017-09-04 15:54:08.000000000 +0000 @@ -54,7 +54,7 @@ ph_durs = siod_get_lval("phoneme_durations","no phoneme durations"); - for (s=u->relation("Segment")->first(); s != 0; s = s->next()) + for (s=u->relation("Segment")->first(); s != 0; s = inext(s)) { ldur = siod_assoc_str(s->name(),ph_durs); stretch = dur_get_stretch_at_seg(s); @@ -83,7 +83,7 @@ *cdebug << "Duration Default module\n"; - for (s=u->relation("Segment")->first(); s != 0; s = s->next()) + for (s=u->relation("Segment")->first(); s != 0; s = inext(s)) { stretch = dur_get_stretch_at_seg(s); end += 0.100*stretch; @@ -106,7 +106,7 @@ tree = siod_get_lval("duration_cart_tree","no duration cart tree"); - for (s=u->relation("Segment")->first(); s != 0; s = s->next()) + for (s=u->relation("Segment")->first(); s != 0; s = inext(s)) { pdur = wagon_predict(s,tree); stretch = dur_get_stretch_at_seg(s); @@ -144,7 +144,7 @@ tree = siod_get_lval("duration_cart_tree","no duration cart tree"); dur_info = siod_get_lval("duration_ph_info","no duration phone info"); - for (s=u->relation("Segment")->first(); s != 0; s = s->next()) + for (s=u->relation("Segment")->first(); s != 0; s = inext(s)) { pdur = wagon_predict(s,tree); ph_info = siod_assoc_str(s->name(),dur_info); diff -Nru festival-2.4~release/src/modules/Duration/Klatt.cc festival-2.5.0/src/modules/Duration/Klatt.cc --- festival-2.4~release/src/modules/Duration/Klatt.cc 2010-11-05 14:13:02.000000000 +0000 +++ festival-2.5.0/src/modules/Duration/Klatt.cc 2017-09-04 15:54:08.000000000 +0000 @@ -90,7 +90,7 @@ klatt_params = siod_get_lval("duration_klatt_params", "no klatt duration params"); - for (s=u->relation("Segment")->first(); s != 0; s = s->next()) + for (s=u->relation("Segment")->first(); s != 0; s = inext(s)) klatt_seg_dur(s); return utt; @@ -158,7 +158,7 @@ // True if this segment is the last in a word EST_Item *nn = seg->as_relation("SylStructure"); - if (nn->next() || (parent(nn)->next())) + if (inext(nn) || (inext(parent(nn)))) return FALSE; else return TRUE; @@ -169,7 +169,7 @@ // True if this segment is the last in a syllable EST_Item *nn = seg->as_relation("SylStructure"); - if (nn->next()) + if (inext(nn)) return FALSE; else return TRUE; @@ -180,7 +180,7 @@ // True if this segment is the first in a word EST_Item *nn = seg->as_relation("SylStructure"); - if (nn->prev() || parent(nn)->prev()) + if (iprev(nn) || iprev(parent(nn))) return FALSE; else return TRUE; @@ -193,7 +193,7 @@ if (word_initial(seg)) { EST_Item *nn = parent(parent(seg,"SylStructure")); - if (as(nn,"Phrase")->prev()) + if (iprev(as(nn,"Phrase"))) return FALSE; else return TRUE; @@ -331,10 +331,10 @@ { if (syl_final(seg)) return 1.2; - s_next = seg->next(); + s_next = inext(seg); if ((s_next) && (syl_final(s_next))) return sub_rule9a(s_next->name()); - s_next_next = s_next->next(); + s_next_next = inext(s_next); if ((ph_is_sonorant(s_next->name())) && (s_next_next) && (ph_is_obstruent(s_next_next->name()))) @@ -346,7 +346,7 @@ { if (syl_final(seg)) return 1.2; - s_next = seg->next(); + s_next = inext(seg); if (ph_is_obstruent(s_next->name())) return sub_rule9a(s_next->name()); } @@ -386,22 +386,22 @@ { if (ph_is_vowel(seg->name())) { - if (ph_is_vowel(seg->next()->name())) + if (ph_is_vowel(inext(seg)->name())) return 1.20; else if ((!phrase_initial(seg)) && - (ph_is_vowel(seg->prev()->name()))) + (ph_is_vowel(iprev(seg)->name()))) return 0.70; else return 1.0; } - else if (ph_is_consonant(seg->next()->name())) + else if (ph_is_consonant(inext(seg)->name())) if (!phrase_initial(seg) && - (ph_is_consonant(seg->prev()->name()))) + (ph_is_consonant(iprev(seg)->name()))) return 0.5; else return 0.7; else if (!phrase_initial(seg) && - (ph_is_consonant(seg->prev()->name()))) + (ph_is_consonant(iprev(seg)->name()))) return 0.7; } diff -Nru festival-2.4~release/src/modules/hts_engine/fest2hts_engine.cc festival-2.5.0/src/modules/hts_engine/fest2hts_engine.cc --- festival-2.4~release/src/modules/hts_engine/fest2hts_engine.cc 2013-02-18 15:10:52.000000000 +0000 +++ festival-2.5.0/src/modules/hts_engine/fest2hts_engine.cc 2017-09-04 15:54:08.000000000 +0000 @@ -169,7 +169,7 @@ r->load(get_param_str("-od", hts_output_params, "tmp.lab"), "htk"); for (o = r->first(), s = u->relation("Segment")->first(); - (o != NULL) && (s != NULL); o = o->next(), s = s->next()) + (o != NULL) && (s != NULL); o = inext(o), s = inext(s)) if (o->S("name").before("+").after("-").matches(s->S("name"))) s->set("end", o->F("end")); else @@ -188,7 +188,7 @@ void HTS_get_copyright(char *str) { int i, nCopyright = HTS_NCOPYRIGHT; char url[] = HTS_URL, version[] = HTS_VERSION; - char *copyright[] = { HTS_COPYRIGHT }; + const char *copyright[] = { HTS_COPYRIGHT }; sprintf(str, "\nThe HMM-Based Speech Synthesis Engine \"hts_engine API\"\n"); diff -Nru festival-2.4~release/src/modules/Intonation/duffint.cc festival-2.5.0/src/modules/Intonation/duffint.cc --- festival-2.4~release/src/modules/Intonation/duffint.cc 2004-09-29 08:56:56.000000000 +0000 +++ festival-2.5.0/src/modules/Intonation/duffint.cc 2017-09-04 15:54:08.000000000 +0000 @@ -69,7 +69,7 @@ return utt; add_target(u,seg->first(),0,start); - s = seg->last(); + s = seg->rlast(); add_target(u,s,(float)ffeature(s,"segment_end"),end); return utt; diff -Nru festival-2.4~release/src/modules/Intonation/gen_int.cc festival-2.5.0/src/modules/Intonation/gen_int.cc --- festival-2.4~release/src/modules/Intonation/gen_int.cc 2010-11-05 14:13:02.000000000 +0000 +++ festival-2.5.0/src/modules/Intonation/gen_int.cc 2017-09-04 15:54:08.000000000 +0000 @@ -74,7 +74,7 @@ targrel = u->create_relation("Target"); - for (s=u->relation("Syllable")->first(); s != 0 ; s=s->next()) + for (s=u->relation("Syllable")->first(); s != 0 ; s=inext(s)) { targets = leval(cons(tfunc,cons(utt,cons(siod(s),NIL))),NIL); @@ -99,7 +99,7 @@ // this is not very efficient EST_Item *seg; - for (seg=u->relation("Segment")->first(); seg != 0;seg=seg->next()) + for (seg=u->relation("Segment")->first(); seg != 0;seg=inext(seg)) { if (seg->F("end") >= pos) return seg; @@ -117,7 +117,7 @@ EST_Item *t; float l = 0.0; - for (t=u->relation("Target")->first_leaf(); t != 0;t=next_leaf(t)) + for (t=first_leaf(u->relation("Target")->first()); t != 0;t=next_leaf(t)) { if (t->F("pos") < l) { diff -Nru festival-2.4~release/src/modules/Intonation/int_aux.cc festival-2.5.0/src/modules/Intonation/int_aux.cc --- festival-2.4~release/src/modules/Intonation/int_aux.cc 2005-03-23 10:03:53.000000000 +0000 +++ festival-2.5.0/src/modules/Intonation/int_aux.cc 2017-09-04 15:54:08.000000000 +0000 @@ -53,7 +53,7 @@ // Check time is NOT the same as the last target, as this causes problems... float last_time; - EST_Item* last_item = u->relation(Targetname)->last_leaf(); + EST_Item* last_item = last_leaf(u->relation(Targetname)->first()); if (last_item) last_time = last_item->f("pos"); else @@ -95,10 +95,10 @@ EST_Item *s; int i; - f0.resize(int(ceil(targ.last_leaf()->F("pos",0) / shift)), 1); + f0.resize(int(ceil(last_leaf(targ.first())->F("pos",0) / shift)), 1); f0.fill_time(shift); - s = targ.first_leaf(); + s = first_leaf(targ.first()); // fill with zeros until first target; for (i = 0; i < f0.num_frames(); ++i) diff -Nru festival-2.4~release/src/modules/Intonation/int_tree.cc festival-2.5.0/src/modules/Intonation/int_tree.cc --- festival-2.4~release/src/modules/Intonation/int_tree.cc 2010-11-05 14:13:02.000000000 +0000 +++ festival-2.5.0/src/modules/Intonation/int_tree.cc 2017-09-04 15:54:08.000000000 +0000 @@ -98,7 +98,7 @@ accent_tree = siod_get_lval("int_accent_cart_tree","no accent tree"); - for (s=u->relation("Syllable")->first(); s != 0; s=s->next()) + for (s=u->relation("Syllable")->first(); s != 0; s=inext(s)) { if ((paccent = accent_specified(s)) == "0") // check if pre-specified paccent = (EST_String)wagon_predict(s,accent_tree); @@ -119,7 +119,7 @@ endtone_tree = siod_get_lval("int_tone_cart_tree","no tone cart tree"); - for (s=u->relation("Syllable")->first(); s != 0; s=s->next()) + for (s=u->relation("Syllable")->first(); s != 0; s=inext(s)) { if ((ptone = tone_specified(s)) == "0") ptone = (EST_String)wagon_predict(s,endtone_tree); @@ -149,7 +149,7 @@ if (ffeature(s,"stress") == "1") { // only goes on first stressed syllable EST_Item *p; - for (p=as(s,"SylStructure")->prev(); p != 0; p=p->prev()) + for (p=iprev(as(s,"SylStructure")); p != 0; p=iprev(p)) if (ffeature(s,"stress") == "1") return "NONE"; // specified but not on this syllable return paccent; // first stressed syl in word @@ -178,7 +178,7 @@ if (ptone == "0") return ptone; } - if (ss->next() == 0) // final syllable in word + if (inext(ss) == 0) // final syllable in word return ptone; else return "NONE"; // pre-specified but inappropriate syllable in word @@ -203,7 +203,7 @@ EST_FVector feats; feats.resize(siod_llength(start_lr)); - for (s=u->relation("Syllable")->first(); s != 0; s=s->next()) + for (s=u->relation("Syllable")->first(); s != 0; s=inext(s)) { find_feat_values(s,start_lr,feats); pstart = apply_lr_model(start_lr,feats); @@ -250,7 +250,7 @@ EST_FVector feats; feats.resize(siod_llength(start_lr)); - for (s=u->relation("Syllable")->first(); s != 0; s=s->next()) + for (s=u->relation("Syllable")->first(); s != 0; s=inext(s)) { find_feat_values(s,start_lr,feats); pstart = apply_lr_model(start_lr,feats); @@ -376,13 +376,13 @@ { // TRUE if segment immediately previous to this is a silence EST_Item *p; - if (s->prev() == 0) + if (iprev(s) == 0) return TRUE; EST_Item *ss = s->as_relation("SylStructure"); - if (s->prev() == ss->prev()) + if (iprev(s) == iprev(ss)) return FALSE; - p = daughter1(ss)->as_relation("Segment")->prev(); + p = iprev(daughter1(ss)->as_relation("Segment")); if (p == 0) return TRUE; else if (ph_is_silence(p->name())) @@ -394,10 +394,10 @@ static int before_pause(EST_Item *s) { // TRUE is segment immediately after this is a silence - if (s->next() == 0) + if (inext(s) == 0) return TRUE; EST_Item *ss = s->as_relation("SylStructure"); - EST_Item *n = daughtern(ss)->as_relation("Segment")->next(); + EST_Item *n = inext(daughtern(ss)->as_relation("Segment")); if (ph_is_silence(n->name())) return TRUE; else @@ -409,7 +409,7 @@ // return related to vowel segment EST_Item *p; - for (p=daughter1(syl,"SylStructure"); p != 0; p=p->next()) + for (p=daughter1(syl,"SylStructure"); p != 0; p=inext(p)) if (ph_is_vowel(p->name())) return p; diff -Nru festival-2.4~release/src/modules/Intonation/simple.cc festival-2.5.0/src/modules/Intonation/simple.cc --- festival-2.4~release/src/modules/Intonation/simple.cc 2010-11-05 14:13:02.000000000 +0000 +++ festival-2.5.0/src/modules/Intonation/simple.cc 2017-09-04 15:54:08.000000000 +0000 @@ -59,7 +59,7 @@ u->create_relation("IntEvent"); u->create_relation("Intonation"); - for (s=u->relation("Syllable")->first(); s != 0; s = s->next()) + for (s=u->relation("Syllable")->first(); s != 0; s = inext(s)) { paccent = wagon_predict(s,accent_tree); if (paccent != "NONE") @@ -89,7 +89,7 @@ u->create_relation("Target"); - for (p=u->relation("Phrase")->first(); p != 0 ; p=p->next()) + for (p=u->relation("Phrase")->first(); p != 0 ; p=inext(p)) { baseline = f0_mean + (f0_std * 0.6); start = ffeature(p,"R:Phrase.daughter1.word_start"); @@ -103,8 +103,8 @@ add_target(u,daughter1(start_syl,"SylStructure"), ffeature(start_syl,"R:SylStructure.daughter1.segment_start"), baseline); - for (s=start_syl->as_relation("Syllable"); s != end_syl->next(); - s = s->next()) + for (s=start_syl->as_relation("Syllable"); s != inext(end_syl); + s = inext(s)) { if (ffeature(s,"accented") == 1) add_targets(u,s,baseline,f0_std); @@ -131,7 +131,7 @@ add_target(u,first_seg,ffeature(first_seg,"segment_start"),baseline); vowel_seg = end_seg; // by default - for (t = first_seg; t != 0; t = t->next()) + for (t = first_seg; t != 0; t = inext(t)) if (ph_is_vowel(t->name())) { vowel_seg = t; diff -Nru festival-2.4~release/src/modules/Lexicon/lex_ff.cc festival-2.5.0/src/modules/Lexicon/lex_ff.cc --- festival-2.4~release/src/modules/Lexicon/lex_ff.cc 2010-11-05 14:13:02.000000000 +0000 +++ festival-2.5.0/src/modules/Lexicon/lex_ff.cc 2017-09-04 15:54:08.000000000 +0000 @@ -80,7 +80,7 @@ // returns the next content word after s EST_Item *p; - for (p=s->as_relation("Word")->next(); p != 0; p = p->next()) + for (p=inext(s->as_relation("Word")); p != 0; p = inext(p)) { if (ff_word_gpos(p) == "content") return EST_Val(p->name()); @@ -95,7 +95,7 @@ int count = 0; EST_Item *p; - for (p=s->as_relation("Word")->next(); p != 0; p = p->next()) + for (p=inext(s->as_relation("Word")); p != 0; p = inext(p)) { if (ff_word_gpos(p) == "content") { @@ -113,7 +113,7 @@ // returns the previous content word after s EST_Item *p; - for (p=s->as_relation("Word")->prev(); p != 0; p = p->prev()) + for (p=iprev(s->as_relation("Word")); p != 0; p = iprev(p)) if (ff_word_gpos(p) == "content") return EST_Val(p->name()); @@ -126,7 +126,7 @@ int count = 0; EST_Item *p; - for (p=s->as_relation("Word")->prev(); p != 0; p = p->prev()) + for (p=iprev(s->as_relation("Word")); p != 0; p = iprev(p)) { if (ff_word_gpos(p) == "content") { @@ -145,7 +145,7 @@ EST_Item *p; int pos=0; - for (p=nn->next(); p; p=p->next()) + for (p=inext(nn); p; p=inext(p)) { if (ff_word_gpos(p) == "content") pos++; @@ -160,7 +160,7 @@ EST_Item *p; int pos=0; - for (p=nn->prev(); p; p=p->prev()) + for (p=iprev(nn); p; p=iprev(p)) { if (ff_word_gpos(p) == "content") pos++; @@ -192,7 +192,7 @@ int vox=FALSE; int sonorant=FALSE; - for (p=daughter1(nn); p->next() != 0; p=p->next()) + for (p=daughter1(nn); inext(p) != 0; p=inext(p)) { if (ph_is_vowel(p->name())) break; @@ -224,16 +224,16 @@ int vox=FALSE; int sonorant=FALSE; - for (p=daughter1(nn); p->next() != 0; p=p->next()) + for (p=daughter1(nn); inext(p) != 0; p=inext(p)) { if (ph_is_vowel(p->name())) break; } - if (p->next() == 0) // empty coda + if (inext(p) == 0) // empty coda return EST_Val("+S"); - for (p=p->next(); p != 0; p=p->next()) + for (p=inext(p); p != 0; p=inext(p)) { if (ph_is_voiced(p->name())) vox = TRUE; diff -Nru festival-2.4~release/src/modules/Makefile festival-2.5.0/src/modules/Makefile --- festival-2.4~release/src/modules/Makefile 2010-02-26 00:20:38.000000000 +0000 +++ festival-2.5.0/src/modules/Makefile 2017-09-04 15:54:08.000000000 +0000 @@ -39,7 +39,7 @@ DIRNAME=src/modules CPPSRCS = BASE_DIRS = Lexicon base Duration Intonation Text \ - UniSyn donovan parser UniSyn_diphone + UniSyn donovan parser UniSyn_diphone # these last four are potentially optional LIB_BUILD_DIRS = $(BASE_DIRS) diff -Nru festival-2.4~release/src/modules/MultiSyn/DiphoneBackoff.cc festival-2.5.0/src/modules/MultiSyn/DiphoneBackoff.cc --- festival-2.4~release/src/modules/MultiSyn/DiphoneBackoff.cc 2010-11-05 14:13:02.000000000 +0000 +++ festival-2.5.0/src/modules/MultiSyn/DiphoneBackoff.cc 2017-09-04 15:54:08.000000000 +0000 @@ -138,78 +138,78 @@ int DiphoneBackoff::backoff(EST_Item *p1) { - EST_Item *p2, *pp, *pps; - EST_String n1,n2,head,sub,full_sub,bo; + EST_Item *p2, *pp, *pps; + EST_String n1,n2,head,sub,full_sub,bo; - bool done = false; - EST_Litem *p; + bool done = false; + EST_Litem *p; - if(! p1) - EST_error("Backoff received null item."); - if ( ! (p2 = p1->next()) ) - EST_error("Backoff didn't get passed a diphone."); - - n1=p1->S("name"); - n2=p2->S("name"); - - p = backofflist.head(); - // for each rule. - while( p!= 0 && !done ) + if(! p1) + EST_error("Backoff received null item."); + if ( ! (p2 = inext(p1)) ) + EST_error("Backoff didn't get passed a diphone."); + + n1=p1->S("name"); + n2=p2->S("name"); + + p = backofflist.head(); + // for each rule. + while( p!= 0 && !done ) { - int i = 0 ; - head = backofflist(p).nth(i++); + int i = 0 ; + head = backofflist(p).nth(i++); - pp = 0; + pp = 0; - // Match head of rule to left phone, or if head of the rule is the defualt substitution - // do it if it hasn't already been done. - if( (head == n1) || ( (head == default_match) && !is_defaultbackoff(p1) ) ) - pp = p1; - // if this fails, try the right phone. - else if( (head == n2) || ( (head == default_match) && !is_defaultbackoff(p2) ) ) - pp = p2; + // Match head of rule to left phone, or if head of the rule is the defualt substitution + // do it if it hasn't already been done. + if( (head == n1) || ( (head == default_match) && !is_defaultbackoff(p1) ) ) + pp = p1; + // if this fails, try the right phone. + else if( (head == n2) || ( (head == default_match) && !is_defaultbackoff(p2) ) ) + pp = p2; - if(pp) + if(pp) { - bo = pp->S("name"); - sub = backofflist(p).nth(i++); - full_sub = sub; - - pp->set("name",sub); - set_backoff(pp); - if(head.matches(default_match)) - set_defaultbackoff(pp); - while (i < backofflist(p).length()) + bo = pp->S("name"); + sub = backofflist(p).nth(i++); + full_sub = sub; + + pp->set("name",sub); + set_backoff(pp); + if(head.matches(default_match)) + set_defaultbackoff(pp); + while (i < backofflist(p).length()) { - sub = backofflist(p).nth(i++); - full_sub = EST_String::cat(full_sub," ",sub); - pp->insert_after(); - pps = pp->as_relation("SylStructure"); - pp = pp->next(); - // insert in SylStructure as well. - pps->insert_after(pp); - - pp->set("name",sub); - set_backoff(pp); - if(head.matches(default_match)) - set_defaultbackoff(pp); + sub = backofflist(p).nth(i++); + full_sub = EST_String::cat(full_sub," ",sub); + pp->insert_after(); + pps = pp->as_relation("SylStructure"); + pp = inext(pp); + // insert in SylStructure as well. + pps->insert_after(pp); + + pp->set("name",sub); + set_backoff(pp); + if(head.matches(default_match)) + set_defaultbackoff(pp); } - EST_warning("Missing diphone: %s_%s. Changing %s to %s.\n", - (const char *)n1, - (const char *)n2, - (const char *)bo, - (const char *)full_sub); - done = true; + EST_warning("Missing diphone: %s_%s. Changing %s to %s.\n", + (const char *)n1, + (const char *)n2, + (const char *)bo, + (const char *)full_sub); + done = true; } - p = p->next(); + p = p->next(); } - if (done) - return 0; - else - return 1; + if (done) + return 0; + else + return 1; } diff -Nru festival-2.4~release/src/modules/MultiSyn/DiphoneUnitVoice.cc festival-2.5.0/src/modules/MultiSyn/DiphoneUnitVoice.cc --- festival-2.4~release/src/modules/MultiSyn/DiphoneUnitVoice.cc 2014-12-11 15:24:37.000000000 +0000 +++ festival-2.5.0/src/modules/MultiSyn/DiphoneUnitVoice.cc 2017-09-04 15:54:08.000000000 +0000 @@ -86,7 +86,7 @@ // the unit linked list *may be* shorter that the segment list. //(admittedly could cause confusion) - for( s=source_lab.head(), u=diphone_stream.head(); (u!=0)&&(s!=0); u=u->next(), s=s->next()){ + for( s=source_lab.head(), u=diphone_stream.head(); (u!=0)&&(s!=0); u=inext(u), s=inext(s)){ EST_Track *pm = track(u->f("coefs")); int end_frame = pm->num_frames() - 1; @@ -101,7 +101,7 @@ u->set("end", p_time); if( u->f_present("extendRight") ){//because diphone squeezed out (see above) - s = s->next(); + s = inext(s); s->set("end", p_time ); } } @@ -125,7 +125,9 @@ const EST_String& uttExt, const EST_String& wavExt, const EST_String& pmExt, - const EST_String& coefExt ) + const EST_String& JCCoefExt, + const EST_String& TCCoefExt ) + : pruning_beam( -1 ), ob_pruning_beam( -1 ), tc_rescoring_beam( -1 ), @@ -147,7 +149,7 @@ // make the default voice module with the supplied parameters addVoiceModule( basenames, uttDir, wavDir, pmDir, coefDir, wav_srate, - uttExt, wavExt, pmExt, coefExt ); + uttExt, wavExt, pmExt, JCCoefExt, TCCoefExt ); diphone_backoff_rules = 0; } @@ -175,7 +177,8 @@ const EST_String& uttExt, const EST_String& wavExt, const EST_String& pmExt, - const EST_String& coefExt ) + const EST_String& JCCoefExt, + const EST_String& TCCoefExt ) { DiphoneVoiceModule *vm; @@ -186,7 +189,7 @@ vm = new DiphoneVoiceModule( basenames, uttDir, wavDir, pmDir, coefDir, srate, - uttExt, wavExt, pmExt, coefExt ); + uttExt, wavExt, pmExt, JCCoefExt, TCCoefExt ); CHECK_PTR(vm); registerVoiceModule( vm ); @@ -276,7 +279,7 @@ { EST_Item *it=units->tail(); - for ( ; path != 0 && it != 0; path=path->from, it=it->prev() ){ + for ( ; path != 0 && it != 0; path=path->from, it=iprev(it) ){ EST_Track *coefs = new EST_Track; CHECK_PTR(coefs); EST_Wave *sig = new EST_Wave; @@ -331,7 +334,7 @@ // np->score = c->score; // else{ // // join cost between right edge of left diphone and vice versa -// np->score = p->score + c->score + jcost( p->c->s->next(), c->s ); +// np->score = p->score + c->score + jcost( inext(p->c->s), c->s ); // } // return np; // } @@ -474,10 +477,10 @@ EST_error( "Segment relation is empty" ); bool extendLeftFlag = false; - for( ; it->next(); it=it->next() ) + for( ; inext(it); it=inext(it) ) { EST_String l = it->S("name"); - EST_String r = it->next()->S("name"); + EST_String r = inext(it)->S("name"); EST_String diphone_name = EST_String::cat(l,"_",r); EST_String orig = diphone_name; @@ -500,19 +503,19 @@ if((s1 = parent(it,"SylStructure"))) w1= parent(s1,"SylStructure"); - if( (s2 = parent(it->next(),"SylStructure"))) + if( (s2 = parent(inext(it),"SylStructure"))) w2= parent(s2,"SylStructure"); if( w1 && w2 && (w1 != w2) ) { EST_Item *sil; - cerr << " Interword so inseting silence.\n"; + cerr << " Interword so inserting silence.\n"; sil = it->insert_after(); sil->set("name",ph_silence()); - r = it->next()->S("name"); + r = inext(it)->S("name"); diphone_name = EST_String::cat(l,"_",r); } @@ -553,7 +556,7 @@ // while(!this->unitAvailable(diphone_name) && // diphone_backoff_rules && // !diphone_backoff_rules->backoff(it)) - // diphone_name = EST_String::cat(it->S("name"),"_",it->next()->S("name")); + // diphone_name = EST_String::cat(it->S("name"),"_",inext(it)->S("name")); if( !this->unitAvailable( diphone_name ) ){ missing_diphones.append( diphone_name ); @@ -576,7 +579,7 @@ // stop if necessary units are still missing. if( missing_diphones.length() > 0 ){ - for( EST_Litem *it=missing_diphones.head(); it!=0 ; it=it->next() ) + for( EST_Litem *it=missing_diphones.head(); it!=0 ; it=it->next() ) printf( "requested diphone missing: %s\n", missing_diphones(it).str() ); EST_warning("Making phone joins to compensate..."); @@ -796,8 +799,8 @@ EST_error( "Segment relation is empty" ); else{ ph2 = it->S("name"); - while( ((it=it->prev())!=0) && - ((db_utt_seg_it=db_utt_seg_it->prev())!=0) ){ + while( ((it=iprev(it))!=0) && + ((db_utt_seg_it=iprev(db_utt_seg_it))!=0) ){ EST_Track *coefs = new EST_Track; CHECK_PTR(coefs); EST_Wave *sig = new EST_Wave; @@ -825,7 +828,7 @@ my_parse_diphone_times( *units, *segs ); // this is for copy synthesis, so copy actual timings - //for( EST_Item *seg = segs->head(); it!=0; it=it->next() ) + //for( EST_Item *seg = segs->head(); it!=0; it=inext(it) ) //seg->set( "end", seg->F("source_end") ); } } @@ -898,3 +901,16 @@ delete l; } } + +void DiphoneUnitVoice::fill_target_coefficients(EST_Utterance *utt, EST_Track *tcCoefs) +{ + + EST_Relation *segs = utt->relation( "Segment" ); + + // Assume that the first voice module can provide this method. + EST_TList::Entries it; + it.begin( voiceModules ); + + (*it)->addTCoefficients(segs, *tcCoefs); +} + diff -Nru festival-2.4~release/src/modules/MultiSyn/DiphoneUnitVoice.h festival-2.5.0/src/modules/MultiSyn/DiphoneUnitVoice.h --- festival-2.4~release/src/modules/MultiSyn/DiphoneUnitVoice.h 2007-04-23 14:47:40.000000000 +0000 +++ festival-2.5.0/src/modules/MultiSyn/DiphoneUnitVoice.h 2017-09-04 15:54:08.000000000 +0000 @@ -87,7 +87,8 @@ const EST_String& uttExt = ".utt", const EST_String& wavExt = ".wav", const EST_String& pmExt = ".pm", - const EST_String& coefExt = ".coef" ); + const EST_String& JCCoefExt = ".coef", + const EST_String& TCCoefExt = ".tcoef" ); virtual ~DiphoneUnitVoice(); @@ -121,7 +122,8 @@ const EST_String& uttExt = ".utt", const EST_String& wavExt = ".wav", const EST_String& pmExt = ".pm", - const EST_String& coefExt = ".coef" ); + const EST_String& JCCoefExt = ".coef", + const EST_String& TCCoefExt = ".tcoef" ); // assume responsibility to delete vm when done with it @@ -171,6 +173,9 @@ void precomputeJoinCosts( const EST_StrList &phones, bool verbose=true ); + void fill_target_coefficients(EST_Utterance *utt, EST_Track *tcCoefs); + + private: // don't allow copying of Voices (for now?) DiphoneUnitVoice( const DiphoneUnitVoice& ); diff -Nru festival-2.4~release/src/modules/MultiSyn/DiphoneVoiceModule.cc festival-2.5.0/src/modules/MultiSyn/DiphoneVoiceModule.cc --- festival-2.4~release/src/modules/MultiSyn/DiphoneVoiceModule.cc 2010-11-05 14:13:02.000000000 +0000 +++ festival-2.5.0/src/modules/MultiSyn/DiphoneVoiceModule.cc 2017-09-04 15:54:08.000000000 +0000 @@ -104,7 +104,8 @@ const EST_String& uttExt, const EST_String& wavExt, const EST_String& pmExt, - const EST_String& coefExt ) + const EST_String& JCCoefExt, + const EST_String& TCCoefExt ) : fileList( basenames ), utt_dir ( uttDir ), @@ -112,7 +113,8 @@ pm_dir( pmDir ), pm_ext( pmExt ), coef_dir( coefDir ), - coef_ext( coefExt ), + JCCoef_ext( JCCoefExt ), + TCCoef_ext( TCCoefExt ), wave_dir( wavDir ), wave_ext( wavExt ), wav_srate( sr ), @@ -123,58 +125,123 @@ } -void DiphoneVoiceModule::addCoefficients( EST_Relation *segs, const EST_Track& coefs ) + +void DiphoneVoiceModule::addTCoefficients( EST_Relation *segs, const EST_Track& coefs ) { - float startt, midt, endt; - EST_FVector *startf, *midf, *endf; - const int num_coefs = coefs.num_channels(); - - // hack to avoid overhead of string creation and deletion - // (EST feature access should really be changed to take - // const char* instead of const EST_String& ) - static const EST_String startcoef_str("startcoef"); - static const EST_String midcoef_str("midcoef"); - static const EST_String endcoef_str("endcoef"); - static const EST_String start_str("start"); - - EST_Item *seg=segs->head(); - startt = seg->F(start_str); - - startf = new EST_FVector(num_coefs); - CHECK_PTR(startf); - coefs.copy_frame_out(coefs.index(startt), *startf); //this one not shared + // This currently copies 4 frames into the segment. + // The first at the start time, second 0.25 through the segment, etc... + + float t0, t5, tstep; + EST_FVector *f; + const int num_coefs = coefs.num_channels(); + + // Feature names. There is also a copy of these in EST_HybridTargetCost.cc + static const EST_String start_str("start"); + static const EST_String ll_str("target_ll"); + static const EST_String l_str("target_l"); + static const EST_String r_str("target_r"); + static const EST_String rr_str("target_rr"); + + EST_Item *seg=segs->head(); - for( ; seg!=0; seg=seg->next() ){ - - // work out boundary for diphone join - midt = getJoinTime( seg ); - - // copy frames out and set as features - seg->features().set_val( startcoef_str, est_val(startf) ); + for( ; seg!=0; seg=inext(seg) ) + { + if(iprev(seg)) + t0 = iprev(seg)->features().val("end").Float(); + else + t0 = 0.0; + t5 = seg->features().val("end").Float(); + tstep = (t5 - t0)/4; + + + //cout << "Processing phone: " << seg->name() << endl; + + f = new EST_FVector(num_coefs); + CHECK_PTR(f); + coefs.copy_frame_out(coefs.index(t0), *f); + seg->features().set_val( ll_str, est_val(f) ); + //cout << " ll: " << t0 << endl; + //f->est_save("-","est_ascii"); + + + f = new EST_FVector(num_coefs); + CHECK_PTR(f); + coefs.copy_frame_out(coefs.index(t0 + tstep), *f); + seg->features().set_val( l_str, est_val(f) ); + //cout << " l: " << t0 + tstep << endl; + //f->est_save("-","est_ascii"); + + + f = new EST_FVector(num_coefs); + CHECK_PTR(f); + coefs.copy_frame_out(coefs.index(t0 + 2.0*tstep), *f); + seg->features().set_val( r_str, est_val(f) ); + //cout << " r: " << t0 + 2.0*tstep << endl; + //f->est_save("-","est_ascii"); + + f = new EST_FVector(num_coefs); + CHECK_PTR(f); + coefs.copy_frame_out(coefs.index(t0 + 3.0*tstep), *f); + seg->features().set_val( rr_str, est_val(f) ); + //cout << " rr: " << t0 + 3.0*tstep << endl; + //f->est_save("-","est_ascii"); + + } +} - midf = new EST_FVector(num_coefs); - CHECK_PTR(midf); - coefs.copy_frame_out(coefs.index(midt), *midf); - seg->features().set_val( midcoef_str, est_val(midf) ); - - endt = seg->features().val("end").Float(); - endf = new EST_FVector(num_coefs); - CHECK_PTR(endf); - coefs.copy_frame_out(coefs.index(endt), *endf); - seg->features().set_val( endcoef_str, est_val(endf) ); + +void DiphoneVoiceModule::addJCoefficients( EST_Relation *segs, const EST_Track& coefs ) +{ + float startt, midt, endt; + EST_FVector *startf, *midf, *endf; + const int num_coefs = coefs.num_channels(); + + // hack to avoid overhead of string creation and deletion + // (EST feature access should really be changed to take + // const char* instead of const EST_String& ) + static const EST_String startcoef_str("startcoef"); + static const EST_String midcoef_str("midcoef"); + static const EST_String endcoef_str("endcoef"); + static const EST_String start_str("start"); + + EST_Item *seg=segs->head(); + startt = seg->F(start_str); + + startf = new EST_FVector(num_coefs); + CHECK_PTR(startf); + coefs.copy_frame_out(coefs.index(startt), *startf); //this one not shared + + for( ; seg!=0; seg=inext(seg) ) + { + // work out boundary for diphone join + midt = getJoinTime( seg ); + + // copy frames out and set as features + seg->features().set_val( startcoef_str, est_val(startf) ); + + midf = new EST_FVector(num_coefs); + CHECK_PTR(midf); + coefs.copy_frame_out(coefs.index(midt), *midf); + seg->features().set_val( midcoef_str, est_val(midf) ); + + endt = seg->features().val("end").Float(); + endf = new EST_FVector(num_coefs); + CHECK_PTR(endf); + coefs.copy_frame_out(coefs.index(endt), *endf); + seg->features().set_val( endcoef_str, est_val(endf) ); - startf = endf; // phones share frame at phone boundary (reference counted in EST_Val) - } + startf = endf; // phones share frame at phone boundary (reference counted in EST_Val) + } } void DiphoneVoiceModule::flatPack( EST_Relation *segs, const EST_TargetCost *tc ) const { - const EST_FlatTargetCost *ftc = (EST_FlatTargetCost *)tc; + const EST_FlatTargetCost *ftc = (EST_FlatTargetCost *)tc; - for( EST_Item *seg=segs->head(); seg->next() !=0; seg=seg->next() ) - tcdatahash->add_item(seg, ftc->flatpack(seg)); + for( EST_Item *seg=segs->head(); inext(seg) !=0; seg=inext(seg) ) + tcdatahash->add_item(seg, ftc->flatpack(seg)); } @@ -210,12 +277,26 @@ segs = u->relation( "Segment" ); // add join cost coefficients (at middle of phones) - EST_Track coefs; - if( (coefs.load((coef_dir+fileList(it)+coef_ext))) != read_ok ) + EST_Track JCCoefs; + if( (JCCoefs.load((coef_dir+fileList(it)+JCCoef_ext))) != read_ok ) EST_error( "Couldn't load data file %s", - (const char*) (coef_dir+fileList(it)+coef_ext) ); + (const char*) (coef_dir+fileList(it)+JCCoef_ext) ); + + addJCoefficients( segs, JCCoefs ); + + // Load target cost Coefficients if specified + if ( TCCoef_ext != EST_String::Empty ) { + + EST_Track TCCoefs; + + if( (TCCoefs.load((coef_dir+fileList(it)+TCCoef_ext))) != read_ok ) + EST_error( "Couldn't load data file %s", + (const char*) (coef_dir+fileList(it)+TCCoef_ext) ); - addCoefficients( segs, coefs ); + addTCoefficients( segs, TCCoefs ); + + } + if (tc->is_flatpack()) { @@ -255,78 +336,78 @@ void DiphoneVoiceModule::addToCatalogue( const EST_Utterance *utt, int *num_ignored, bool ignore_bad ) { - EST_Item *item, *next_item; - ItemList *diphoneList; - const EST_String *ph1, *ph2; - int found=0; - - static const EST_String bad_str( "bad" ); - - item = (utt->relation( "Segment" ))->tail(); - if( item!=0 ){ - ph2 = &(item->features().val("name").String()); - - while( (item=item->prev()) != 0 ){ - - next_item = item->next(); - - // You'd think we need to check both item->f_present(bad_str) and - // next_item->f_present(bad_str) like this: - //if((item->f_present(bad_str) || next_item->f_present(bad_str)) && ignore_bad == true){ - // But experiment showed that then each time one diphone too many would be - // ignored. This was partly compensated by a bug pesent up to r1.14 - // (a iteration within "if(item=item->prev()!=0)" just before the "continue") - // which caused the leftmost bad phone in a row of bad phones NOT to be ignored - // when the length of the row was even (or when it was odd and ended in the - // utterance-final phone, which is never checked for badness). - if(item->f_present(bad_str) && ignore_bad == true){ - - (*num_ignored)++; - - EST_warning( "Ignoring diphone \"%s_%s\" (LEFT %s in %s at %fs, bad flag \"%s\")", - item->S("name").str(), - next_item->S("name").str(), - item->S("name").str(), - utt->f.S("fileid").str(), - item->F("end"), - item->S("bad").str() ); + EST_Item *item, *next_item; + ItemList *diphoneList; + const EST_String *ph1, *ph2; + int found=0; + + static const EST_String bad_str( "bad" ); + + item = (utt->relation( "Segment" ))->tail(); + if( item!=0 ){ + ph2 = &(item->features().val("name").String()); + + while( (item=iprev(item)) != 0 ) + { + next_item = inext(item); + + // You'd think we need to check both item->f_present(bad_str) and + // next_item->f_present(bad_str) like this: + //if((item->f_present(bad_str) || next_item->f_present(bad_str)) && ignore_bad == true){ + // But experiment showed that then each time one diphone too many would be + // ignored. This was partly compensated by a bug pesent up to r1.14 + // (a iteration within "if(item=item->prev()!=0)" just before the "continue") + // which caused the leftmost bad phone in a row of bad phones NOT to be ignored + // when the length of the row was even (or when it was odd and ended in the + // utterance-final phone, which is never checked for badness). + if(item->f_present(bad_str) && ignore_bad == true){ + + (*num_ignored)++; + + EST_warning( "Ignoring diphone \"%s_%s\" (LEFT %s in %s at %fs, bad flag \"%s\")", + item->S("name").str(), + next_item->S("name").str(), + item->S("name").str(), + utt->f.S("fileid").str(), + item->F("end"), + item->S("bad").str() ); - if(item->prev() != 0){ - continue; - } - else - break; //already at start of list, so finish up - } + if(iprev(item) != 0){ + continue; + } + else + break; //already at start of list, so finish up + } - ph1 = &(item->features().val("name").String()); + ph1 = &(item->features().val("name").String()); -// EST_warning( "Adding phone \"%s\" (%s, %f) to diphoneList %s_%s", -// item->S("name").str(), -// utt->f.S("fileid").str(), -// item->F("end"), -// item->S("name").str(), -// next_item->S("name").str()); + // EST_warning( "Adding phone \"%s\" (%s, %f) to diphoneList %s_%s", + // item->S("name").str(), + // utt->f.S("fileid").str(), + // item->F("end"), + // item->S("name").str(), + // next_item->S("name").str()); - diphoneList = catalogue->val(EST_String::cat(*ph1,"_",*ph2), found); + diphoneList = catalogue->val(EST_String::cat(*ph1,"_",*ph2), found); - if( !found ){ - diphoneList = new ItemList; - CHECK_PTR(diphoneList); - catalogue->add_item(EST_String::cat(*ph1,"_",*ph2), diphoneList, 1); // no_search=1 - } + if( !found ){ + diphoneList = new ItemList; + CHECK_PTR(diphoneList); + catalogue->add_item(EST_String::cat(*ph1,"_",*ph2), diphoneList, 1); // no_search=1 + } - diphoneList->append( item ); + diphoneList->append( item ); - ph2 = ph1; + ph2 = ph1; + } } - } } void DiphoneVoiceModule::getDiphone( const EST_Item *phone1, EST_Track* coef, EST_Wave* sig, int *midframe, bool extendLeft, bool extendRight ) const { - EST_Item *phone2 = phone1->next(); + EST_Item *phone2 = inext(phone1); // load the relevant parts const EST_String &fname = phone1->relation()->utt()->f.val("fileid").String(); @@ -435,7 +516,7 @@ EST_VTCandidate *c = new EST_VTCandidate; CHECK_PTR(c); - EST_Item *cand_ph2 = cand_ph1->next(); + EST_Item *cand_ph2 = inext(cand_ph1); // set up all the members we can here c->s = const_cast(cand_ph1); @@ -446,7 +527,7 @@ else left = fvector( cand_ph1->features().val( "midcoef" ) ); - if( target_ph1->next()->f_present( extendRight_str ) ) + if( inext(target_ph1)->f_present( extendRight_str ) ) right = fvector( cand_ph2->features().val( "endcoef" ) ); else right = fvector( cand_ph2->features().val( "midcoef" ) ); @@ -554,7 +635,7 @@ if( utt_dbase != 0 ){ for( EST_Litem *it=utt_dbase->head(); it!=0 ; it=it->next() ){ EST_Item *ph=(*utt_dbase)(it)->relation("Segment")->head(); - for( ; ph!=0; ph=ph->next() ){ + for( ; ph!=0; ph=inext(ph) ){ if( ph->S("name") == phone ){ list.append( ph ); n++; diff -Nru festival-2.4~release/src/modules/MultiSyn/DiphoneVoiceModule.h festival-2.5.0/src/modules/MultiSyn/DiphoneVoiceModule.h --- festival-2.4~release/src/modules/MultiSyn/DiphoneVoiceModule.h 2006-08-16 16:34:46.000000000 +0000 +++ festival-2.5.0/src/modules/MultiSyn/DiphoneVoiceModule.h 2017-09-04 15:54:08.000000000 +0000 @@ -113,7 +113,8 @@ const EST_String& uttExt = ".utt", const EST_String& wavExt = ".wav", const EST_String& pmExt = ".pm", - const EST_String& coefExt = ".coef" ); + const EST_String& JCCoefExt = ".coef", + const EST_String& TCCoefExt = ".tcoef" ); virtual ~DiphoneVoiceModule(); @@ -165,7 +166,8 @@ // Flatpack void flatPack( EST_Relation *segs, const EST_TargetCost *tc) const; - void addCoefficients( EST_Relation *segs, const EST_Track& coefs ); + void addTCoefficients( EST_Relation *segs, const EST_Track& coefs ); + void addJCoefficients( EST_Relation *segs, const EST_Track& coefs ); void addToCatalogue( const EST_Utterance *utt, int *num_ignored, bool ignore_bad=false ); void getDiphone( const EST_Item *phone1, EST_Track* coef, EST_Wave* sig, int* midframe, @@ -180,7 +182,8 @@ EST_String pm_dir; // pitch marks EST_String pm_ext; EST_String coef_dir; // for coefficients that aren't pitch syncronous - EST_String coef_ext; + EST_String JCCoef_ext; + EST_String TCCoef_ext; EST_String wave_dir; // waveform (or residual) EST_String wave_ext; diff -Nru festival-2.4~release/src/modules/MultiSyn/EST_DiphoneCoverage.cc festival-2.5.0/src/modules/MultiSyn/EST_DiphoneCoverage.cc --- festival-2.4~release/src/modules/MultiSyn/EST_DiphoneCoverage.cc 2005-05-11 15:10:07.000000000 +0000 +++ festival-2.5.0/src/modules/MultiSyn/EST_DiphoneCoverage.cc 2017-09-04 15:54:08.000000000 +0000 @@ -74,40 +74,44 @@ void EST_DiphoneCoverage::add_stats(const EST_Utterance *utt) { - EST_Relation *segs = utt->relation("Segment"); - EST_Item *it=segs->head(); + EST_Relation *segs = utt->relation("Segment"); + EST_Item *it=segs->head(); - for( ; it->next(); it=it->next() ) - if(it->next()) - { - EST_String key = - EST_String::cat(get_diphone_name(it),"-", - get_stress_name(get_stress_index(it)),"-", - get_syl_pos_name(get_syl_pos_index(it))); - int val = 0; - if (strhash.present(key)) - { - val = strhash.val(key); - strhash.remove_item(key); - } - ++val; - strhash.add_item(key,val); - } + for( ; inext(it); it=inext(it) ) + { + if(inext(it)) + { + EST_String key = + EST_String::cat(get_diphone_name(it),"-", + get_stress_name(get_stress_index(it)),"-", + get_syl_pos_name(get_syl_pos_index(it))); + int val = 0; + if (strhash.present(key)) + { + val = strhash.val(key); + strhash.remove_item(key); + } + ++val; + strhash.add_item(key,val); + } + } } void EST_DiphoneCoverage::print_stats(const EST_String filename) { - ostream *outf; + ostream *outf; - if (filename == "-") - outf = &cout; - else - outf = new ofstream(filename); + if (filename == "-") + outf = &cout; + else + outf = new ofstream(filename); - EST_THash::Entries them; + EST_THash::Entries them; - for(them.begin(strhash); them; them++) - *outf << them->k << " " << them->v << "\n"; + for (them.begin(strhash); them; them++) + { + *outf << them->k << " " << them->v << "\n"; + } if (outf != &cout) delete outf; @@ -119,23 +123,23 @@ static EST_String get_diphone_name(const EST_Item *seg1) { - return EST_String::cat(seg1->S("name"),"_",seg1->next()->S("name")); + return EST_String::cat(seg1->S("name"),"_",inext(seg1)->S("name")); } static int get_stress_index(const EST_Item *seg1) { - int i1 = 0, i2=0; + int i1 = 0, i2=0; - if( ph_is_vowel(seg1->S("name")) && - !ph_is_silence(seg1->S("name")) ) - i1 = (parent(seg1,"SylStructure")->I("stress") > 0) ? 1 : 0; + if( ph_is_vowel(seg1->S("name")) && + !ph_is_silence(seg1->S("name")) ) + i1 = (parent(seg1,"SylStructure")->I("stress") > 0) ? 1 : 0; - if( ph_is_vowel(seg1->next()->S("name")) && - !ph_is_silence(seg1->next()->S("name")) ) - i2 = (parent(seg1->next(),"SylStructure")->I("stress") > 0) ? 1 : 0; + if( ph_is_vowel(inext(seg1)->S("name")) && + !ph_is_silence(inext(seg1)->S("name")) ) + i2 = (parent(inext(seg1),"SylStructure")->I("stress") > 0) ? 1 : 0; - return i2+2*i1; + return i2+2*i1; } static EST_String get_stress_name(const int index) @@ -148,9 +152,9 @@ int pos = TCPOS_MEDIAL; const EST_Item *syl = parent(seg1,"SylStructure"); - const EST_Item *next_syl = parent(seg1->next(),"SylStructure"); - const EST_Item *next_next_syl = parent(seg1->next()->next(),"SylStructure"); - const EST_Item *prev_syl = parent(seg1->prev(),"SylStructure"); + const EST_Item *next_syl = parent(inext(seg1),"SylStructure"); + const EST_Item *next_next_syl = parent(inext(inext(seg1)),"SylStructure"); + const EST_Item *prev_syl = parent(iprev(seg1),"SylStructure"); if( syl != next_syl ) pos = TCPOS_INTER; diff -Nru festival-2.4~release/src/modules/MultiSyn/EST_FlatTargetCost.cc festival-2.5.0/src/modules/MultiSyn/EST_FlatTargetCost.cc --- festival-2.4~release/src/modules/MultiSyn/EST_FlatTargetCost.cc 2010-04-30 11:04:06.000000000 +0000 +++ festival-2.5.0/src/modules/MultiSyn/EST_FlatTargetCost.cc 2017-09-04 15:54:08.000000000 +0000 @@ -76,231 +76,231 @@ TCData *EST_FlatTargetCost::flatpack(EST_Item *seg) const { - const EST_Item *syl, *nsyl, *nnsyl, *word; + const EST_Item *syl, *nsyl, *nnsyl, *word; - TCData *f =new TCData(TCHI_LAST); + TCData *f =new TCData(TCHI_LAST); - syl=tc_get_syl(seg); - nsyl=tc_get_syl(seg->next()); - if(seg->next()->next()) - nnsyl=tc_get_syl(seg->next()->next()); - else nnsyl = 0; + syl=tc_get_syl(seg); + nsyl=tc_get_syl(inext(seg)); + if(inext(inext(seg))) + nnsyl=tc_get_syl(inext(inext(seg))); + else nnsyl = 0; - // This segment features + // This segment features - //cout << "SEG: " << seg->S("name") << " is vowel: " - // << ph_is_vowel(seg->S("name")) << endl; + //cout << "SEG: " << seg->S("name") << " is vowel: " + // << ph_is_vowel(seg->S("name")) << endl; - if(ph_is_vowel(seg->S("name"))) - (*f)[VOWEL]=1; - else - (*f)[VOWEL]=0; + if(ph_is_vowel(seg->S("name"))) + (*f)[VOWEL]=1; + else + (*f)[VOWEL]=0; - //cout << "SEG: " << seg->S("name") << " is sil: " - // << ph_is_silence(seg->S("name")) << endl; + //cout << "SEG: " << seg->S("name") << " is sil: " + // << ph_is_silence(seg->S("name")) << endl; - if(ph_is_silence(seg->S("name"))) - (*f)[SIL]=1; - else - (*f)[SIL]=0; + if(ph_is_silence(seg->S("name"))) + (*f)[SIL]=1; + else + (*f)[SIL]=0; - if(seg->f_present("bad_dur")) - (*f)[BAD_DUR]=1; - else - (*f)[BAD_DUR]=0; + if(seg->f_present("bad_dur")) + (*f)[BAD_DUR]=1; + else + (*f)[BAD_DUR]=0; - if(seg->next()->f_present("bad_dur")) - (*f)[NBAD_DUR]=1; - else - (*f)[NBAD_DUR]=0; + if(inext(seg)->f_present("bad_dur")) + (*f)[NBAD_DUR]=1; + else + (*f)[NBAD_DUR]=0; - if(seg->f_present("bad_lex")) - (*f)[BAD_OOL]=1; - else - (*f)[BAD_OOL]=0; + if(seg->f_present("bad_lex")) + (*f)[BAD_OOL]=1; + else + (*f)[BAD_OOL]=0; - if(seg->next()->f_present("bad_lex")) - (*f)[NBAD_OOL]=1; - else - (*f)[NBAD_OOL]=0; + if(inext(seg)->f_present("bad_lex")) + (*f)[NBAD_OOL]=1; + else + (*f)[NBAD_OOL]=0; - (*f)[BAD_F0]=get_bad_f0(seg); + (*f)[BAD_F0]=get_bad_f0(seg); - // This segments syl features + // This segments syl features - if(syl) + if(syl) { - (*f)[SYL]=simple_id(syl->S("id")); - (*f)[SYL_STRESS]=syl->I("stress"); - //cout << "syl id: " << simple_id(syl->S("id")) - //<< " stress: " << syl->I("stress") << endl; + (*f)[SYL]=simple_id(syl->S("id")); + (*f)[SYL_STRESS]=syl->I("stress"); + //cout << "syl id: " << simple_id(syl->S("id")) + //<< " stress: " << syl->I("stress") << endl; } - else + else { - (*f)[SYL]=0; - (*f)[SYL_STRESS]=0; - //cout << "no syl present " << endl; + (*f)[SYL]=0; + (*f)[SYL_STRESS]=0; + //cout << "no syl present " << endl; } - // Next segment features + // Next segment features - //cout << "NSEG: " << seg->next()->S("name") << " is sil: " - // << ph_is_silence(seg->next()->S("name")) << endl; + //cout << "NSEG: " << seg->next()->S("name") << " is sil: " + // << ph_is_silence(seg->next()->S("name")) << endl; - if(ph_is_silence(seg->next()->S("name"))) - (*f)[N_SIL]=1; - else - (*f)[N_SIL]=0; + if(ph_is_silence(inext(seg)->S("name"))) + (*f)[N_SIL]=1; + else + (*f)[N_SIL]=0; - //cout << "NSEG: " << seg->next()->S("name") << " is vowel: " - // << ph_is_vowel(seg->next()->S("name")) << endl; + //cout << "NSEG: " << seg->next()->S("name") << " is vowel: " + // << ph_is_vowel(seg->next()->S("name")) << endl; - if(ph_is_vowel(seg->next()->S("name"))) - (*f)[N_VOWEL]=1; - else - (*f)[N_VOWEL]=0; + if(ph_is_vowel(inext(seg)->S("name"))) + (*f)[N_VOWEL]=1; + else + (*f)[N_VOWEL]=0; - // Next seg syl features - if(nsyl) + // Next seg syl features + if(nsyl) { - (*f)[NSYL]=simple_id(nsyl->S("id")); - (*f)[NSYL_STRESS]=nsyl->I("stress"); - //cout << "nsyl stress: " << nsyl->I("stress") << endl; + (*f)[NSYL]=simple_id(nsyl->S("id")); + (*f)[NSYL_STRESS]=nsyl->I("stress"); + //cout << "nsyl stress: " << nsyl->I("stress") << endl; } - else + else { - (*f)[NSYL]=0; - (*f)[NSYL_STRESS]=0; - //cout << "no nsyl: " << endl; + (*f)[NSYL]=0; + (*f)[NSYL_STRESS]=0; + //cout << "no nsyl: " << endl; } - if(seg->next()->next()) - { - //cout << "RC: " << seg->next()->next()->S("name") - //<< " " << simple_phone(seg->next()->next()->S("name")) - // << endl; - (*f)[RC]=simple_phone(seg->next()->next()->S("name")); - (*f)[NNBAD_DUR]=seg->next()->next()->f_present("bad_dur"); + if(inext(inext(seg))) + { + //cout << "RC: " << seg->next()->next()->S("name") + //<< " " << simple_phone(seg->next()->next()->S("name")) + // << endl; + (*f)[RC]=simple_phone(inext(inext(seg))->S("name")); + (*f)[NNBAD_DUR]=inext(inext(seg))->f_present("bad_dur"); } - else + else { - //cout << "NO RC\n"; - (*f)[RC]=0; - (*f)[NNBAD_DUR]=0; + //cout << "NO RC\n"; + (*f)[RC]=0; + (*f)[NNBAD_DUR]=0; } - // Next next seg syl features. - if(nnsyl) + // Next next seg syl features. + if(nnsyl) { - (*f)[NNSYL]=simple_id(nnsyl->S("id")); + (*f)[NNSYL]=simple_id(nnsyl->S("id")); } - else - (*f)[NNSYL]=0; + else + (*f)[NNSYL]=0; - // Prev seg syl feature - if(seg->prev()) + // Prev seg syl feature + if(iprev(seg)) { - (*f)[LC]=simple_phone(seg->prev()->S("name")); - (*f)[PBAD_DUR]=seg->prev()->f_present("bad_dur"); + (*f)[LC]=simple_phone(iprev(seg)->S("name")); + (*f)[PBAD_DUR]=iprev(seg)->f_present("bad_dur"); } - else + else { - (*f)[LC]=0; - (*f)[PBAD_DUR]=0; + (*f)[LC]=0; + (*f)[PBAD_DUR]=0; } - if(seg->prev() && (syl=tc_get_syl(seg->prev()))) - (*f)[PSYL]=simple_id(syl->S("id")); - else - (*f)[PSYL]=0; + if(iprev(seg) && (syl=tc_get_syl(iprev(seg)))) + (*f)[PSYL]=simple_id(syl->S("id")); + else + (*f)[PSYL]=0; - // seg word feature - if((word=tc_get_word(seg))) - (*f)[WQRD]=simple_id(word->S("id")); - else - (*f)[WQRD]=0; + // seg word feature + if((word=tc_get_word(seg))) + (*f)[WQRD]=simple_id(word->S("id")); + else + (*f)[WQRD]=0; - // Next seg word features - if((word=tc_get_word(seg->next()))) - (*f)[NWQRD]=simple_id(word->S("id")); - else - (*f)[NWQRD]=0; + // Next seg word features + if((word=tc_get_word(inext(seg)))) + (*f)[NWQRD]=simple_id(word->S("id")); + else + (*f)[NWQRD]=0; + + // next next seg word feature + if(inext(inext(seg)) && (word=tc_get_word(inext(inext(seg))))) + (*f)[NNWQRD]=simple_id(word->S("id")); + else + (*f)[NNWQRD]=0; + + // Prev seg word feature + if(iprev(seg) && (word=tc_get_word(iprev(seg)))) + (*f)[PWQRD]=simple_id(word->S("id")); + else + (*f)[PWQRD]=0; - // next next seg word feature - if(seg->next()->next() && (word=tc_get_word(seg->next()->next()))) - (*f)[NNWQRD]=simple_id(word->S("id")); - else - (*f)[NNWQRD]=0; - // Prev seg word feature - if(seg->prev() && (word=tc_get_word(seg->prev()))) - (*f)[PWQRD]=simple_id(word->S("id")); - else - (*f)[PWQRD]=0; - - - // segs sylpos - (*f)[SYLPOS]=0; // medial - if( f->a_no_check(SYL)!= f->a_no_check(NSYL) ) - (*f)[SYLPOS]=1; // inter - else if( f->a_no_check(SYL)!= f->a_no_check(PSYL) ) - (*f)[SYLPOS]=2; // initial - else if( f->a_no_check(NSYL) != f->a_no_check(NNSYL) ) - (*f)[SYLPOS]=3; // final - - // segs wordpos - (*f)[WQRDPOS]=0; // medial - if( f->a_no_check(WQRD)!= f->a_no_check(NWQRD) ) - (*f)[WQRDPOS]=1; // inter - else if( f->a_no_check(WQRD)!= f->a_no_check(PWQRD) ) - (*f)[WQRDPOS]=2; // initial - else if( f->a_no_check(NWQRD) != f->a_no_check(NNWQRD) ) - (*f)[WQRDPOS]=3; // final - - // pbreak - if ((word=tc_get_word(seg))) - { - if ( word->S("pbreak") == "NB" ) - (*f)[PBREAK]=0; - else if ( word->S("pbreak") == "B" ) - (*f)[PBREAK]=1; - else - (*f)[PBREAK]=2; + // segs sylpos + (*f)[SYLPOS]=0; // medial + if( f->a_no_check(SYL)!= f->a_no_check(NSYL) ) + (*f)[SYLPOS]=1; // inter + else if( f->a_no_check(SYL)!= f->a_no_check(PSYL) ) + (*f)[SYLPOS]=2; // initial + else if( f->a_no_check(NSYL) != f->a_no_check(NNSYL) ) + (*f)[SYLPOS]=3; // final + + // segs wordpos + (*f)[WQRDPOS]=0; // medial + if( f->a_no_check(WQRD)!= f->a_no_check(NWQRD) ) + (*f)[WQRDPOS]=1; // inter + else if( f->a_no_check(WQRD)!= f->a_no_check(PWQRD) ) + (*f)[WQRDPOS]=2; // initial + else if( f->a_no_check(NWQRD) != f->a_no_check(NNWQRD) ) + (*f)[WQRDPOS]=3; // final + + // pbreak + if ((word=tc_get_word(seg))) + { + if ( word->S("pbreak") == "NB" ) + (*f)[PBREAK]=0; + else if ( word->S("pbreak") == "B" ) + (*f)[PBREAK]=1; + else + (*f)[PBREAK]=2; } - else - (*f)[PBREAK]=-1; + else + (*f)[PBREAK]=-1; - // seg punc and pos - if((word=tc_get_word(seg))) + // seg punc and pos + if((word=tc_get_word(seg))) { - (*f)[POS]=simple_pos(word->S("pos")); - (*f)[PUNC]=simple_punc(parent(word,"Token")->S("punc","NONE")); + (*f)[POS]=simple_pos(word->S("pos")); + (*f)[PUNC]=simple_punc(parent(word,"Token")->S("punc","NONE")); } - else + else { - (*f)[POS]=-1; - (*f)[PUNC]=-1; + (*f)[POS]=-1; + (*f)[PUNC]=-1; } - // next seg punc and pos - if ((word=tc_get_word(seg->next()))) + // next seg punc and pos + if ((word=tc_get_word(inext(seg)))) { - (*f)[NPOS]=simple_pos(word->S("pos")); - (*f)[NPUNC]=simple_punc(parent(word,"Token")->S("punc","NONE")); + (*f)[NPOS]=simple_pos(word->S("pos")); + (*f)[NPUNC]=simple_punc(parent(word,"Token")->S("punc","NONE")); } - else + else { - (*f)[NPOS]=-1; - (*f)[NPUNC]=-1; + (*f)[NPOS]=-1; + (*f)[NPUNC]=-1; } - return f; - //seg->set_val("tcdata",est_val(f)); // copied? + return f; + //seg->set_val("tcdata",est_val(f)); // copied? } @@ -508,7 +508,7 @@ // the f0 (i.e. fv->a_no_check( fv->n()-1 ) ) EST_String left(seg->S("name")); - EST_String right(seg->next()->S("name")); + EST_String right(inext(seg)->S("name")); EST_FVector *fv = 0; int penalty = 0; @@ -523,12 +523,12 @@ penalty += 1; } - if( seg->next()->f_present("midcoef") && + if( inext(seg)->f_present("midcoef") && ( ph_is_vowel( right ) || ph_is_approximant( right ) || ph_is_liquid( right ) || ph_is_nasal( right ) ) ){ - fv = fvector( seg->next()->f("midcoef") ); + fv = fvector( inext(seg)->f("midcoef") ); if( fv->a_no_check(fv->n()-1) == -1.0 ) // means unvoiced penalty += 1; } diff -Nru festival-2.4~release/src/modules/MultiSyn/EST_FlatTargetCost.h festival-2.5.0/src/modules/MultiSyn/EST_FlatTargetCost.h --- festival-2.4~release/src/modules/MultiSyn/EST_FlatTargetCost.h 2010-04-30 11:04:06.000000000 +0000 +++ festival-2.5.0/src/modules/MultiSyn/EST_FlatTargetCost.h 2017-09-04 15:54:08.000000000 +0000 @@ -123,7 +123,7 @@ -#endif // __EST_TARGETCOST_H__ +#endif // __EST_TARGETCOST_FLAT_H__ diff -Nru festival-2.4~release/src/modules/MultiSyn/EST_HybridTargetCost.cc festival-2.5.0/src/modules/MultiSyn/EST_HybridTargetCost.cc --- festival-2.4~release/src/modules/MultiSyn/EST_HybridTargetCost.cc 1970-01-01 00:00:00.000000000 +0000 +++ festival-2.5.0/src/modules/MultiSyn/EST_HybridTargetCost.cc 2017-09-04 15:54:08.000000000 +0000 @@ -0,0 +1,169 @@ +/*************************************************************************/ +/* */ +/* Centre for Speech Technology Research */ +/* (University of Edinburgh, UK) and */ +/* Rob Clark */ +/* Copyright (c) 2015 */ +/* All Rights Reserved. */ +/* */ +/* Permission is hereby granted, free of charge, to use and distribute */ +/* this software and its documentation without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of this work, and to */ +/* permit persons to whom this work is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* 1. The code must retain the above copyright notice, this list of */ +/* conditions and the following disclaimer. */ +/* 2. Any modifications must be clearly marked as such. */ +/* 3. Original authors' names are not deleted. */ +/* 4. The authors' names are not used to endorse or promote products */ +/* derived from this software without specific prior written */ +/* permission. */ +/* */ +/* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */ +/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */ +/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT */ +/* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */ +/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */ +/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */ +/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */ +/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */ +/* THIS SOFTWARE. */ +/* */ +/*************************************************************************/ +/* */ +/* Author: Rob Clark */ +/* Date: June 2015 */ +/* --------------------------------------------------------------------- */ +/* */ +/* */ +/* */ +/* */ +/*************************************************************************/ + + +#include "EST_HybridTargetCost.h" +#include "EST_FMatrix.h" +#include "ling_class/EST_Item.h" + + +static float kl_divergence(EST_FVector *a, EST_FVector *b); + +float EST_HybridTargetCost::operator()(const EST_Item* targ, const EST_Item* cand) const +{ + set_targ_and_cand(targ,cand); + score = 0.0; + weight_sum = 0.0; + + score += add_weight(50.0)*kl_features_cost(); + + return score / weight_sum; +} + + +float EST_HybridTargetCost::kl_features_cost() const +{ + /* In cases where there is a missing diphone and the current target is extended, there should be a feature: + targ or cand->features().val("extendLeft").String() [or extendRight] set, in which case 6 + comparisons should be made rather than 4. + */ + + // Feature names copied from DiphoneVoiceModule.cc + static const EST_String start_str("start"); + static const EST_String ll_str("target_ll"); + static const EST_String l_str("target_l"); + static const EST_String r_str("target_r"); + static const EST_String rr_str("target_rr"); + + EST_FVector *targv_l_r = NULL; + EST_FVector *targv_l_rr = NULL; + EST_FVector *targv_r_ll = NULL; + EST_FVector *targv_r_l = NULL; + + // First get the Fvectors. + /* This is complicated by the fact that added interword silences won't have features */ + if( targ->features().present(r_str)) { + targv_l_r = fvector(targ->features().val( r_str )); + targv_l_rr = fvector(targ->features().val( rr_str )); + } + + if( inext(targ)->features().present(ll_str)) { + targv_r_ll = fvector(inext(targ)->features().val( ll_str )); + targv_r_l = fvector(inext(targ)->features().val( l_str )); + } + + EST_FVector *candv_l_r = fvector(cand->features().val( r_str )); + EST_FVector *candv_l_rr = fvector(cand->features().val( rr_str )); + EST_FVector *candv_r_ll = fvector(inext(cand)->features().val( ll_str )); + EST_FVector *candv_r_l = fvector(inext(cand)->features().val( l_str )); + + return 0.25 * (kl_divergence(targv_l_r,candv_l_r) + kl_divergence(targv_l_rr,candv_l_rr) + + kl_divergence(targv_r_ll,candv_r_ll) + kl_divergence(targv_r_l,candv_r_l)); +} + + + // Symmetric KL divergence for vectors where first n/2 elements are the means + // and the second n/2 are the diagonal covariances for Gaussians + float kl_divergence(EST_FVector *a, EST_FVector *b) + { + + // If the target has no features (inserted interword silence) then just return 0.0 + if (!a) + return 0.0; + + int l1 = a->length(); + int l2 = b->length(); + + if (l1 != l2 ) { + cout << "kl_divergence vector length error: (" << l1 << " , " << l2 << " )" << endl; + return 1.0; + } + + // First extract means and stddevs. + EST_FVector mean_a; + EST_FVector mean_b; + EST_FVector std_a; + EST_FVector std_b; + + a->sub_vector(mean_a, 0, l1/2); + b->sub_vector(mean_b, 0, l2/2); + a->sub_vector(std_a, l1/2, l1/2); + a->sub_vector(std_b, l2/2, l2/2); + + // Calculate the individual terms of the KL divergence + // each way around for symmetry + float trace_term1 = 0.0; + float trace_term2 = 0.0; + float sq_tmp; + float mahalanobis_term1 = 0.0; + float mahalanobis_term2 = 0.0; + float det_term1 = 1.0; + float det_term2 = 1.0; + + + for (int i = 0 ; i < std_a.length() ; i++) { + // trace term reduces to an elementwise sum of ratios of stddevs. + trace_term1 += std_a.a(i) / std_b.a(i); + trace_term2 += std_b.a(i) / std_a.a(i); + // The Mahalanobis term reduces to the elementwise sum of the squared mean + // differences divided by the stddevs. + sq_tmp = (mean_a.a(i) - mean_b.a(i)) * (mean_a.a(i) - mean_b.a(i)); + mahalanobis_term1 += sq_tmp / std_b.a(i); + mahalanobis_term2 += sq_tmp / std_a.a(i); + // The ratio of determinants reduced to the product of elementwise stddev ratios. + det_term1 *= std_b.a(i) / std_a.a(i); + det_term2 *= std_a.a(i) / std_b.a(i); + } + + // Return the average of non-symmetric kl divergences + return 0.25 * ( trace_term1 + mahalanobis_term1 - std_a.length() + log(det_term1) + + trace_term2 + mahalanobis_term2 - std_b.length() + log(det_term2) ) ; + + } + + + + + + diff -Nru festival-2.4~release/src/modules/MultiSyn/EST_HybridTargetCost.h festival-2.5.0/src/modules/MultiSyn/EST_HybridTargetCost.h --- festival-2.4~release/src/modules/MultiSyn/EST_HybridTargetCost.h 1970-01-01 00:00:00.000000000 +0000 +++ festival-2.5.0/src/modules/MultiSyn/EST_HybridTargetCost.h 2017-09-04 15:54:08.000000000 +0000 @@ -0,0 +1,72 @@ +/*************************************************************************/ +/* */ +/* Centre for Speech Technology Research */ +/* (University of Edinburgh, UK) and */ +/* Rob Clark */ +/* Copyright (c) 2015 */ +/* All Rights Reserved. */ +/* */ +/* Permission is hereby granted, free of charge, to use and distribute */ +/* this software and its documentation without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of this work, and to */ +/* permit persons to whom this work is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* 1. The code must retain the above copyright notice, this list of */ +/* conditions and the following disclaimer. */ +/* 2. Any modifications must be clearly marked as such. */ +/* 3. Original authors' names are not deleted. */ +/* 4. The authors' names are not used to endorse or promote products */ +/* derived from this software without specific prior written */ +/* permission. */ +/* */ +/* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */ +/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */ +/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT */ +/* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */ +/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */ +/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */ +/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */ +/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */ +/* THIS SOFTWARE. */ +/* */ +/*************************************************************************/ +/* */ +/* Author: Rob Clark */ +/* Date: June 2015 */ +/* --------------------------------------------------------------------- */ +/* */ +/* */ +/* */ +/* */ +/*************************************************************************/ + + +#ifndef __EST_TARGETCOST_HYBRID_H__ +#define __EST_TARGETCOST_HYBRID_H__ + +#include "EST_TargetCost.h" + + +/* + * DERIVED CLASS: EST_HybridTargetCost + */ +class EST_HybridTargetCost : public EST_TargetCost { + + + private: + float kl_features_cost() const; + + + public: + float operator()(const EST_Item* targ, const EST_Item* cand) const; + +}; + + +#endif // __EST_TARGETCOST_HYBRID_H__ + + + + diff -Nru festival-2.4~release/src/modules/MultiSyn/EST_JoinCost.h festival-2.5.0/src/modules/MultiSyn/EST_JoinCost.h --- festival-2.4~release/src/modules/MultiSyn/EST_JoinCost.h 2007-04-23 14:47:40.000000000 +0000 +++ festival-2.5.0/src/modules/MultiSyn/EST_JoinCost.h 2017-09-04 15:54:08.000000000 +0000 @@ -131,7 +131,7 @@ //default zero cost if units contiguous in database // (i.e. this is the cost between a phone and *itself* - if( left == right->prev() ) + if( left == iprev(right) ) return 0.0; // An "infinite" join cost for bad units. The idea here is that if @@ -200,7 +200,7 @@ //default zero cost if units contiguous in database // (i.e. this is the cost between a phone and *itself* - if( left->ph1->next() == right->ph1 ) + if( inext(left->ph1) == right->ph1 ) return 0.0; //use cached costs in preference to calculating diff -Nru festival-2.4~release/src/modules/MultiSyn/EST_TargetCost.cc festival-2.5.0/src/modules/MultiSyn/EST_TargetCost.cc --- festival-2.4~release/src/modules/MultiSyn/EST_TargetCost.cc 2010-11-05 14:13:02.000000000 +0000 +++ festival-2.5.0/src/modules/MultiSyn/EST_TargetCost.cc 2017-09-04 15:54:08.000000000 +0000 @@ -69,373 +69,372 @@ // This is really designed only for apml! float EST_TargetCost::apml_accent_cost() const { - // Check if target is an apml utterance. If not return 0 as we don't - // trust its accent specification. + // Check if target is an apml utterance. If not return 0 as we don't + // trust its accent specification. - if( !tc_get_utt(targ)->relation_present("SemStructure")) - return 0.0; + if( !tc_get_utt(targ)->relation_present("SemStructure")) + return 0.0; - // Check if candidate is an apml utterance. If not return 1 - // (as we want to use apml if available) + // Check if candidate is an apml utterance. If not return 1 + // (as we want to use apml if available) - if( !tc_get_utt(cand)->relation_present("SemStructure")) - return 1.0; + if( !tc_get_utt(cand)->relation_present("SemStructure")) + return 1.0; - // As they are both apml match accents. + // As they are both apml match accents. - const EST_Item *tsyl, *csyl; - EST_String targ_accent, cand_accent, targ_boundary, cand_boundary; + const EST_Item *tsyl, *csyl; + EST_String targ_accent, cand_accent, targ_boundary, cand_boundary; - if( ph_is_vowel(targ->features().val("name").String()) && - !ph_is_silence(targ->features().val("name").String()) ) + if( ph_is_vowel(targ->features().val("name").String()) && + !ph_is_silence(targ->features().val("name").String()) ) { - tsyl = tc_get_syl(targ); - csyl = tc_get_syl(cand); + tsyl = tc_get_syl(targ); + csyl = tc_get_syl(cand); - // Can't assume candidate and target identities are the same - // (because of backoff to a silence for example) - if( csyl == 0 ) - return 1.0; - - targ_accent = ff_tobi_accent(tsyl); - cand_accent = ff_tobi_accent(csyl); - targ_boundary = ff_tobi_endtone(tsyl); - cand_boundary = ff_tobi_endtone(csyl); + // Can't assume candidate and target identities are the same + // (because of backoff to a silence for example) + if( csyl == 0 ) + return 1.0; + + targ_accent = ff_tobi_accent(tsyl); + cand_accent = ff_tobi_accent(csyl); + targ_boundary = ff_tobi_endtone(tsyl); + cand_boundary = ff_tobi_endtone(csyl); - if( (cand_accent != targ_accent) || (cand_boundary != targ_boundary) ) - return 1.0; + if( (cand_accent != targ_accent) || (cand_boundary != targ_boundary) ) + return 1.0; } - if( ph_is_vowel(targ->next()->features().val("name").String()) && - !ph_is_silence(targ->next()->features().val("name").String()) ) + if( ph_is_vowel(inext(targ)->features().val("name").String()) && + !ph_is_silence(inext(targ)->features().val("name").String()) ) { - tsyl = tc_get_syl(targ->next()); - csyl = tc_get_syl(cand->next()); + tsyl = tc_get_syl(inext(targ)); + csyl = tc_get_syl(inext(cand)); - // Can't assume candidate and target identities are the same - // (because of backoff to a silence for example) - if( csyl == 0 ) - return 1.0; - - targ_accent = ff_tobi_accent(tsyl); - cand_accent = ff_tobi_accent(csyl); - targ_boundary = ff_tobi_endtone(tsyl); - cand_boundary = ff_tobi_endtone(csyl); + // Can't assume candidate and target identities are the same + // (because of backoff to a silence for example) + if( csyl == 0 ) + return 1.0; + + targ_accent = ff_tobi_accent(tsyl); + cand_accent = ff_tobi_accent(csyl); + targ_boundary = ff_tobi_endtone(tsyl); + cand_boundary = ff_tobi_endtone(csyl); - if( (cand_accent != targ_accent) || (cand_boundary != targ_boundary) ) - return 1.0; + if( (cand_accent != targ_accent) || (cand_boundary != targ_boundary) ) + return 1.0; } - return 0.0; + return 0.0; } float EST_TargetCost::stress_cost() const { - int cand_stress; - int targ_stress; - const EST_Item *tsyl, *csyl; + int cand_stress; + int targ_stress; + const EST_Item *tsyl, *csyl; - if( ph_is_vowel(targ->features().val("name").String()) && - !ph_is_silence(targ->features().val("name").String()) ) + if( ph_is_vowel(targ->features().val("name").String()) && + !ph_is_silence(targ->features().val("name").String()) ) { - tsyl = tc_get_syl(targ); - csyl = tc_get_syl(cand); + tsyl = tc_get_syl(targ); + csyl = tc_get_syl(cand); - // Can't assume candidate and target identities are the same - // (because of backoff to a silence for example) - if( csyl == 0 ) + // Can't assume candidate and target identities are the same + // (because of backoff to a silence for example) + if( csyl == 0 ) { - //cout << "SC: 1 returning 1\n"; - return 1; + //cout << "SC: 1 returning 1\n"; + return 1; } - targ_stress = (tsyl->I("stress") > 0) ? 1 : 0; - cand_stress = (csyl->I("stress") > 0) ? 1 : 0; + targ_stress = (tsyl->I("stress") > 0) ? 1 : 0; + cand_stress = (csyl->I("stress") > 0) ? 1 : 0; - if( cand_stress != targ_stress) + if( cand_stress != targ_stress) { - //cout << "SC: 2 returning 1\n"; - return 1; + //cout << "SC: 2 returning 1\n"; + return 1; } } - if( ph_is_vowel(targ->next()->features().val("name").String()) && - !ph_is_silence(targ->next()->features().val("name").String()) ) + if( ph_is_vowel(inext(targ)->features().val("name").String()) && + !ph_is_silence(inext(targ)->features().val("name").String()) ) { - tsyl = tc_get_syl(targ->next()); - csyl = tc_get_syl(cand->next()); + tsyl = tc_get_syl(inext(targ)); + csyl = tc_get_syl(inext(cand)); - // Can't assume candidate and target identities are the same - // (because of backoff to a silence for example) - if( csyl == 0 ) + // Can't assume candidate and target identities are the same + // (because of backoff to a silence for example) + if( csyl == 0 ) { - //cout << "SC: 3 returning 1\n"; - return 1; + //cout << "SC: 3 returning 1\n"; + return 1; } - targ_stress = (tsyl->I("stress") > 0) ? 1 : 0; - cand_stress = (csyl->I("stress") > 0) ? 1 : 0; - if( cand_stress != targ_stress) + targ_stress = (tsyl->I("stress") > 0) ? 1 : 0; + cand_stress = (csyl->I("stress") > 0) ? 1 : 0; + if( cand_stress != targ_stress) { - //cout << "SC: 4 returning 1\n"; - return 1; + //cout << "SC: 4 returning 1\n"; + return 1; } } - //cout << "SC: 5 returning 0\n"; - return 0; + //cout << "SC: 5 returning 0\n"; + return 0; } float EST_TargetCost::position_in_syllable_cost() const { - tcpos_t targ_pos = TCPOS_MEDIAL; - tcpos_t cand_pos = TCPOS_MEDIAL; + tcpos_t targ_pos = TCPOS_MEDIAL; + tcpos_t cand_pos = TCPOS_MEDIAL; - const EST_Item *targ_syl = tc_get_syl(targ); - const EST_Item *targ_next_syl = tc_get_syl(targ->next()); - const EST_Item *targ_next_next_syl = tc_get_syl(targ->next()->next()); - const EST_Item *targ_prev_syl = tc_get_syl(targ->prev()); - const EST_Item *cand_syl = tc_get_syl(cand); - const EST_Item *cand_next_syl = tc_get_syl(cand->next()); - const EST_Item *cand_next_next_syl = tc_get_syl(cand->next()->next()); - const EST_Item *cand_prev_syl = tc_get_syl(cand->prev()); + const EST_Item *targ_syl = tc_get_syl(targ); + const EST_Item *targ_next_syl = tc_get_syl(inext(targ)); + const EST_Item *targ_next_next_syl = tc_get_syl(inext(inext(targ))); + const EST_Item *targ_prev_syl = tc_get_syl(iprev(targ)); + const EST_Item *cand_syl = tc_get_syl(cand); + const EST_Item *cand_next_syl = tc_get_syl(inext(cand)); + const EST_Item *cand_next_next_syl = tc_get_syl(inext(inext(cand))); + const EST_Item *cand_prev_syl = tc_get_syl(iprev(cand)); - if( targ_syl != targ_next_syl ) - targ_pos = TCPOS_INTER; - else if( targ_syl != targ_prev_syl) - targ_pos = TCPOS_INITIAL; - else if( targ_next_syl != targ_next_next_syl) - targ_pos = TCPOS_FINAL; + if( targ_syl != targ_next_syl ) + targ_pos = TCPOS_INTER; + else if( targ_syl != targ_prev_syl) + targ_pos = TCPOS_INITIAL; + else if( targ_next_syl != targ_next_next_syl) + targ_pos = TCPOS_FINAL; - if( cand_syl != cand_next_syl ) - cand_pos = TCPOS_INTER; - else if( cand_syl != cand_prev_syl) - cand_pos = TCPOS_INITIAL; - else if( cand_next_syl != cand_next_next_syl) - cand_pos = TCPOS_FINAL; + if( cand_syl != cand_next_syl ) + cand_pos = TCPOS_INTER; + else if( cand_syl != cand_prev_syl) + cand_pos = TCPOS_INITIAL; + else if( cand_next_syl != cand_next_next_syl) + cand_pos = TCPOS_FINAL; - return (targ_pos == cand_pos) ? 0 : 1; + return (targ_pos == cand_pos) ? 0 : 1; } float EST_TargetCost::position_in_word_cost() const { - tcpos_t targ_pos = TCPOS_MEDIAL; - tcpos_t cand_pos = TCPOS_MEDIAL; + tcpos_t targ_pos = TCPOS_MEDIAL; + tcpos_t cand_pos = TCPOS_MEDIAL; - const EST_Item *targ_word = tc_get_word(targ); - const EST_Item *targ_next_word = tc_get_word(targ->next()); - const EST_Item *targ_next_next_word = tc_get_word(targ->next()->next()); - const EST_Item *targ_prev_word = tc_get_word(targ->prev()); - const EST_Item *cand_word = tc_get_word(cand); - const EST_Item *cand_next_word = tc_get_word(cand->next()); - const EST_Item *cand_next_next_word = tc_get_word(cand->next()->next()); - const EST_Item *cand_prev_word = tc_get_word(cand->prev()); - - if( targ_word != targ_next_word ) - targ_pos = TCPOS_INTER; - else if( targ_word != targ_prev_word) - targ_pos = TCPOS_INITIAL; - else if( targ_next_word != targ_next_next_word) - targ_pos = TCPOS_FINAL; - - if( cand_word != cand_next_word ) - cand_pos = TCPOS_INTER; - else if( cand_word != cand_prev_word) - cand_pos = TCPOS_INITIAL; - else if( cand_next_word != cand_next_next_word) - cand_pos = TCPOS_FINAL; + const EST_Item *targ_word = tc_get_word(targ); + const EST_Item *targ_next_word = tc_get_word(inext(targ)); + const EST_Item *targ_next_next_word = tc_get_word(inext(inext(targ))); + const EST_Item *targ_prev_word = tc_get_word(iprev(targ)); + const EST_Item *cand_word = tc_get_word(cand); + const EST_Item *cand_next_word = tc_get_word(inext(cand)); + const EST_Item *cand_next_next_word = tc_get_word(inext(inext(cand))); + const EST_Item *cand_prev_word = tc_get_word(iprev(cand)); + + if( targ_word != targ_next_word ) + targ_pos = TCPOS_INTER; + else if( targ_word != targ_prev_word) + targ_pos = TCPOS_INITIAL; + else if( targ_next_word != targ_next_next_word) + targ_pos = TCPOS_FINAL; + + if( cand_word != cand_next_word ) + cand_pos = TCPOS_INTER; + else if( cand_word != cand_prev_word) + cand_pos = TCPOS_INITIAL; + else if( cand_next_word != cand_next_next_word) + cand_pos = TCPOS_FINAL; - return (targ_pos == cand_pos) ? 0 : 1; + return (targ_pos == cand_pos) ? 0 : 1; } float EST_TargetCost::position_in_phrase_cost() const { - const EST_Item *targ_word = tc_get_word(targ); - const EST_Item *cand_word = tc_get_word(cand); + const EST_Item *targ_word = tc_get_word(targ); + const EST_Item *cand_word = tc_get_word(cand); - if (!targ_word && !cand_word) - return 0; - if (!targ_word || !cand_word) - return 1; + if (!targ_word && !cand_word) + return 0; + if (!targ_word || !cand_word) + return 1; - return (targ_word->features().val("pbreak").String() == cand_word->features().val("pbreak").String()) ? 0 : 1; + return (targ_word->features().val("pbreak").String() == cand_word->features().val("pbreak").String()) ? 0 : 1; } float EST_TargetCost::punctuation_cost() const { - - const EST_Item *targ_word = tc_get_word(targ); - const EST_Item *cand_word = tc_get_word(cand); - const EST_Item *next_targ_word = tc_get_word(targ->next()); - const EST_Item *next_cand_word = tc_get_word(cand->next()); - - float score = 0.0; - - if ( (targ_word && !cand_word) || (!targ_word && cand_word) ) - score += 0.5; - else - if (targ_word && cand_word) - if ( parent(targ_word,"Token")->features().val("punc","NONE").String() - != parent(cand_word,"Token")->features().val("punc","NONE").String() ) - score += 0.5; - - - if ( (next_targ_word && !next_cand_word) || (!next_targ_word && next_cand_word) ) - score += 0.5; - else - if(next_targ_word && next_cand_word) - if ( parent(next_targ_word,"Token")->features().val("punc","NONE").String() - != parent(next_cand_word,"Token")->features().val("punc","NONE").String() ) - score += 0.5; + const EST_Item *targ_word = tc_get_word(targ); + const EST_Item *cand_word = tc_get_word(cand); + const EST_Item *next_targ_word = tc_get_word(inext(targ)); + const EST_Item *next_cand_word = tc_get_word(inext(cand)); + + float score = 0.0; + + if ( (targ_word && !cand_word) || (!targ_word && cand_word) ) + score += 0.5; + else + if (targ_word && cand_word) + if ( parent(targ_word,"Token")->features().val("punc","NONE").String() + != parent(cand_word,"Token")->features().val("punc","NONE").String() ) + score += 0.5; + + + if ( (next_targ_word && !next_cand_word) || (!next_targ_word && next_cand_word) ) + score += 0.5; + else + if(next_targ_word && next_cand_word) + if ( parent(next_targ_word,"Token")->features().val("punc","NONE").String() + != parent(next_cand_word,"Token")->features().val("punc","NONE").String() ) + score += 0.5; - return score; + return score; } float EST_TargetCost::partofspeech_cost() const { - // Compare left phone half of diphone - const EST_Item *targ_left_word = tc_get_word(targ); - const EST_Item *cand_left_word = tc_get_word(cand); - - if(!targ_left_word && !cand_left_word) - return 0; - if(!targ_left_word || !cand_left_word) - return 1; + // Compare left phone half of diphone + const EST_Item *targ_left_word = tc_get_word(targ); + const EST_Item *cand_left_word = tc_get_word(cand); + + if(!targ_left_word && !cand_left_word) + return 0; + if(!targ_left_word || !cand_left_word) + return 1; + + const EST_String targ_left_pos( simple_pos(targ_left_word->features().val("pos").String()) ); + const EST_String cand_left_pos( simple_pos(cand_left_word->features().val("pos").String()) ); + + if( targ_left_pos != cand_left_pos ) + return 1; + + // Compare right phone half of diphone + const EST_Item *targ_right_word = tc_get_word(inext(targ)); + const EST_Item *cand_right_word = tc_get_word(inext(cand)); + + if(!targ_right_word && !cand_right_word) + return 0; + if(!targ_right_word || !cand_right_word) + return 1; - const EST_String targ_left_pos( simple_pos(targ_left_word->features().val("pos").String()) ); - const EST_String cand_left_pos( simple_pos(cand_left_word->features().val("pos").String()) ); + const EST_String targ_right_pos( simple_pos(targ_right_word->features().val("pos").String()) ); + const EST_String cand_right_pos( simple_pos(cand_right_word->features().val("pos").String()) ); - if( targ_left_pos != cand_left_pos ) - return 1; + if( targ_right_pos != cand_right_pos ) + return 1; - // Compare right phone half of diphone - const EST_Item *targ_right_word = tc_get_word(targ->next()); - const EST_Item *cand_right_word = tc_get_word(cand->next()); - - if(!targ_right_word && !cand_right_word) return 0; - if(!targ_right_word || !cand_right_word) - return 1; - - const EST_String targ_right_pos( simple_pos(targ_right_word->features().val("pos").String()) ); - const EST_String cand_right_pos( simple_pos(cand_right_word->features().val("pos").String()) ); - - if( targ_right_pos != cand_right_pos ) - return 1; - - return 0; } float EST_TargetCost::left_context_cost() const { + EST_Item *targ_context = iprev(targ); + EST_Item *cand_context = iprev(cand); - EST_Item *targ_context = targ->prev(); - EST_Item *cand_context = cand->prev(); - - if ( !targ_context && !cand_context) - return 0; - if ( !targ_context || !cand_context) - return 1; + if ( !targ_context && !cand_context) + return 0; + if ( !targ_context || !cand_context) + return 1; - return (targ_context->features().val("name").String() == cand_context->features().val("name").String()) ? 0 : 1; + return (targ_context->features().val("name").String() == cand_context->features().val("name").String()) ? 0 : 1; } float EST_TargetCost::right_context_cost() const { - EST_Item *targ_context = targ->next()->next(); - EST_Item *cand_context = cand->next()->next(); + EST_Item *targ_context = inext(inext(targ)); + EST_Item *cand_context = inext(inext(cand)); - if ( !targ_context && !cand_context) - return 0; - if ( !targ_context || !cand_context) - return 1; + if ( !targ_context && !cand_context) + return 0; + if ( !targ_context || !cand_context) + return 1; - return (targ_context->features().val("name").String() == cand_context->features().val("name").String()) ? 0 : 1; + return (targ_context->features().val("name").String() == cand_context->features().val("name").String()) ? 0 : 1; } float EST_TargetCost::out_of_lex_cost() const { - static const EST_String ool_feat("bad_lex"); + static const EST_String ool_feat("bad_lex"); - // bad_dur may at some stage be set on a target for resynthesis purposes. - if( cand->f_present(ool_feat) - != targ->f_present(ool_feat) ) - return 1.0; - - if( cand->next()->f_present(ool_feat) - != targ->next()->f_present(ool_feat) ) - return 1.0; + // bad_dur may at some stage be set on a target for resynthesis purposes. + if( cand->f_present(ool_feat) + != targ->f_present(ool_feat) ) + return 1.0; + + if( inext(cand)->f_present(ool_feat) + != inext(targ)->f_present(ool_feat) ) + return 1.0; - return 0.0; + return 0.0; } float EST_TargetCost::bad_duration_cost() const { - static const EST_String bad_dur_feat("bad_dur"); + static const EST_String bad_dur_feat("bad_dur"); - // bad_dur may at some stage be set on a target for resynthesis purposes. - if( cand->f_present(bad_dur_feat) - != targ->f_present(bad_dur_feat) ) - return 1.0; - - if( cand->next()->f_present(bad_dur_feat) - != targ->next()->f_present(bad_dur_feat) ) - return 1.0; - // If the segments next to these segments are bad, then these ones are probably wrong too! - if( cand->prev() && targ->prev() && ( cand->prev()->f_present(bad_dur_feat) - != targ->prev()->f_present(bad_dur_feat) ) ) - return 1.0; - - if( cand->next()->next() && targ->next()->next() && ( cand->next()->next()->f_present(bad_dur_feat) - != targ->next()->next()->f_present(bad_dur_feat) ) ) - return 1.0; + // bad_dur may at some stage be set on a target for resynthesis purposes. + if( cand->f_present(bad_dur_feat) + != targ->f_present(bad_dur_feat) ) + return 1.0; + + if( inext(cand)->f_present(bad_dur_feat) + != inext(targ)->f_present(bad_dur_feat) ) + return 1.0; + // If the segments next to these segments are bad, then these ones are probably wrong too! + if( iprev(cand) && iprev(targ) && ( iprev(cand)->f_present(bad_dur_feat) + != iprev(targ)->f_present(bad_dur_feat) ) ) + return 1.0; + + if( inext(inext(cand)) && inext(inext(targ)) && + ( inext(inext(cand))->f_present(bad_dur_feat) + != inext(inext(targ))->f_present(bad_dur_feat) ) ) + return 1.0; - return 0.0; + return 0.0; } float EST_TargetCost::bad_f0_cost() const { - // by default, the last element of join cost coef vector is - // the f0 (i.e. fv->a_no_check( fv->n()-1 ) ) + // by default, the last element of join cost coef vector is + // the f0 (i.e. fv->a_no_check( fv->n()-1 ) ) - const EST_Item *cand_left = cand; - const EST_Item *cand_right = cand_left->next(); + const EST_Item *cand_left = cand; + const EST_Item *cand_right = inext(cand_left); - const EST_String &left_phone( cand_left->features().val("name").String() ); - const EST_String &right_phone( cand_right->features().val("name").String() ); + const EST_String &left_phone( cand_left->features().val("name").String() ); + const EST_String &right_phone( cand_right->features().val("name").String() ); - EST_FVector *fv = 0; - float penalty = 0.0; - - if( ph_is_vowel( left_phone ) - || ph_is_approximant( left_phone ) - || ph_is_liquid( left_phone ) - || ph_is_nasal( left_phone ) ){ - fv = fvector( cand_left->f("midcoef") ); - if( fv->a_no_check(fv->n()-1) == -1.0 ) // means unvoiced - penalty += 0.5; - } - - if( ph_is_vowel( right_phone ) - || ph_is_approximant( right_phone ) - || ph_is_liquid( right_phone ) - || ph_is_nasal( right_phone ) ){ - fv = fvector( cand_right->f("midcoef") ); - if( fv->a_no_check(fv->n()-1) == -1.0 ) // means unvoiced - penalty += 0.5; - } + EST_FVector *fv = 0; + float penalty = 0.0; + + if( ph_is_vowel( left_phone ) + || ph_is_approximant( left_phone ) + || ph_is_liquid( left_phone ) + || ph_is_nasal( left_phone ) ){ + fv = fvector( cand_left->f("midcoef") ); + if( fv->a_no_check(fv->n()-1) == -1.0 ) // means unvoiced + penalty += 0.5; + } + + if( ph_is_vowel( right_phone ) + || ph_is_approximant( right_phone ) + || ph_is_liquid( right_phone ) + || ph_is_nasal( right_phone ) ){ + fv = fvector( cand_right->f("midcoef") ); + if( fv->a_no_check(fv->n()-1) == -1.0 ) // means unvoiced + penalty += 0.5; + } - return penalty; + return penalty; } @@ -448,27 +447,27 @@ float EST_DefaultTargetCost::operator()(const EST_Item* targ, const EST_Item* cand) const { - set_targ_and_cand(targ,cand); - score = 0.0; - weight_sum = 0.0; - - score += add_weight(10.0)*stress_cost(); - score += add_weight(5.0)*position_in_syllable_cost(); - score += add_weight(5.0)*position_in_word_cost(); - score += add_weight(6.0)*partofspeech_cost(); - score += add_weight(15.0)*position_in_phrase_cost(); - score += add_weight(4.0)*left_context_cost(); - score += add_weight(3.0)*right_context_cost(); - - score /= weight_sum; - - // These are considered really bad, and will result in a score > 1. - score += 10.0*bad_duration_cost(); // see also join cost. - score += 10.0*bad_f0_cost(); - score += 10.0*punctuation_cost(); - score += 10.0*out_of_lex_cost(); + set_targ_and_cand(targ,cand); + score = 0.0; + weight_sum = 0.0; + + score += add_weight(10.0)*stress_cost(); + score += add_weight(5.0)*position_in_syllable_cost(); + score += add_weight(5.0)*position_in_word_cost(); + score += add_weight(6.0)*partofspeech_cost(); + score += add_weight(15.0)*position_in_phrase_cost(); + score += add_weight(4.0)*left_context_cost(); + score += add_weight(3.0)*right_context_cost(); + + score /= weight_sum; + + // These are considered really bad, and will result in a score > 1. + score += 10.0*bad_duration_cost(); // see also join cost. + score += 10.0*bad_f0_cost(); + score += 10.0*punctuation_cost(); + score += 10.0*out_of_lex_cost(); - return score ; + return score ; } /* @@ -478,27 +477,27 @@ float EST_APMLTargetCost::operator()(const EST_Item* targ, const EST_Item* cand) const { - set_targ_and_cand(targ,cand); - score = 0.0; - weight_sum = 0.0; - - score += add_weight(10.0)*stress_cost(); - score += add_weight(20.0)*apml_accent_cost(); // APML only! - score += add_weight(5.0)*position_in_syllable_cost(); - score += add_weight(5.0)*position_in_word_cost(); - score += add_weight(6.0)*partofspeech_cost(); - score += add_weight(4.0)*position_in_phrase_cost(); - score += add_weight(4.0)*left_context_cost(); - score += add_weight(3.0)*right_context_cost(); - - score /= weight_sum; - - score += 10.0*bad_duration_cost(); // see also join cost. - score += 10.0*bad_f0_cost(); - score += 10.0*punctuation_cost(); - score += 10.0*out_of_lex_cost(); + set_targ_and_cand(targ,cand); + score = 0.0; + weight_sum = 0.0; + + score += add_weight(10.0)*stress_cost(); + score += add_weight(20.0)*apml_accent_cost(); // APML only! + score += add_weight(5.0)*position_in_syllable_cost(); + score += add_weight(5.0)*position_in_word_cost(); + score += add_weight(6.0)*partofspeech_cost(); + score += add_weight(4.0)*position_in_phrase_cost(); + score += add_weight(4.0)*left_context_cost(); + score += add_weight(3.0)*right_context_cost(); + + score /= weight_sum; + + score += 10.0*bad_duration_cost(); // see also join cost. + score += 10.0*bad_f0_cost(); + score += 10.0*punctuation_cost(); + score += 10.0*out_of_lex_cost(); - return score; + return score; } @@ -512,141 +511,139 @@ float EST_SingingTargetCost::pitch_cost() const { + const EST_Item *targ_word = tc_get_word(targ); + const EST_Item *cand_word = tc_get_word(cand); + const EST_Item *next_targ_word = tc_get_word(inext(targ)); + const EST_Item *next_cand_word = tc_get_word(inext(cand)); + const float threshold = 0.1; + float targ_pitch, cand_pitch; + LISP l_tmp; - const EST_Item *targ_word = tc_get_word(targ); - const EST_Item *cand_word = tc_get_word(cand); - const EST_Item *next_targ_word = tc_get_word(targ->next()); - const EST_Item *next_cand_word = tc_get_word(cand->next()); - const float threshold = 0.1; - float targ_pitch, cand_pitch; - LISP l_tmp; + float score = 0.0; - float score = 0.0; - - if ( (targ_word && !cand_word) || (!targ_word && cand_word) ) + if ( (targ_word && !cand_word) || (!targ_word && cand_word) ) { - cout << "PITCH PENALTY WORD NON-WORD MISMATCH\n"; - score += 0.5; + cout << "PITCH PENALTY WORD NON-WORD MISMATCH\n"; + score += 0.5; } - else - if (targ_word && cand_word) - { + else + if (targ_word && cand_word) + { - l_tmp = lisp_val(parent(targ_word,"Token")->f("freq",est_val(0))); + l_tmp = lisp_val(parent(targ_word,"Token")->f("freq",est_val(0))); - // This currently assumes one syllable words, need to process - // the list more for multiple syllable words, or move the info - // to the syllable. - if(CONSP(l_tmp)) - targ_pitch = get_c_float(car(l_tmp)); - else - targ_pitch = get_c_float(l_tmp); - cand_pitch = parent(cand_word,"Token")->F("freq",0.0); - - if ( ! threshold_equal(targ_pitch,cand_pitch,threshold)) - { - cout << "PP: " << targ_pitch << " " << cand_pitch << endl; - score += 0.5; - } - } + // This currently assumes one syllable words, need to process + // the list more for multiple syllable words, or move the info + // to the syllable. + if(CONSP(l_tmp)) + targ_pitch = get_c_float(car(l_tmp)); + else + targ_pitch = get_c_float(l_tmp); + cand_pitch = parent(cand_word,"Token")->F("freq",0.0); + + if ( ! threshold_equal(targ_pitch,cand_pitch,threshold)) + { + cout << "PP: " << targ_pitch << " " << cand_pitch << endl; + score += 0.5; + } + } - if ( (next_targ_word && !next_cand_word) || (!next_targ_word && next_cand_word) ) + if ( (next_targ_word && !next_cand_word) || (!next_targ_word && next_cand_word) ) { - cout << "PITCH PENALTY NEXT WORD NON-WORD MISMATCH\n"; - score += 0.5; + cout << "PITCH PENALTY NEXT WORD NON-WORD MISMATCH\n"; + score += 0.5; } - else - if(next_targ_word && next_cand_word) - { - l_tmp = lisp_val(parent(next_targ_word,"Token")->f("freq",est_val(0))); - if(CONSP(l_tmp)) - targ_pitch = get_c_float(car(l_tmp)); - else - targ_pitch = get_c_float(l_tmp); - cand_pitch = parent(next_cand_word,"Token")->F("freq",0.0); - - if ( ! threshold_equal(targ_pitch,cand_pitch,threshold)) - { - cout << "NP: " << targ_pitch << " " << cand_pitch << endl; - score += 0.5; - } - } + else + if(next_targ_word && next_cand_word) + { + l_tmp = lisp_val(parent(next_targ_word,"Token")->f("freq",est_val(0))); + if(CONSP(l_tmp)) + targ_pitch = get_c_float(car(l_tmp)); + else + targ_pitch = get_c_float(l_tmp); + cand_pitch = parent(next_cand_word,"Token")->F("freq",0.0); + + if ( ! threshold_equal(targ_pitch,cand_pitch,threshold)) + { + cout << "NP: " << targ_pitch << " " << cand_pitch << endl; + score += 0.5; + } + } - if (score == 0.0) - cout << "NO PITCH PENALTY\n"; + if (score == 0.0) + cout << "NO PITCH PENALTY\n"; - return score; + return score; } float EST_SingingTargetCost::duration_cost() const { - - const EST_Item *targ_word = tc_get_word(targ); - const EST_Item *cand_word = tc_get_word(cand); - const EST_Item *next_targ_word = tc_get_word(targ->next()); - const EST_Item *next_cand_word = tc_get_word(cand->next()); - float targ_dur, cand_dur; - LISP l_tmp; - - float score = 0.0; - - if ( (targ_word && !cand_word) || (!targ_word && cand_word) ) - score += 0.5; - else - if (targ_word && cand_word) - { - l_tmp = lisp_val(parent(targ_word,"Token")->f("dur",est_val(0))); - if(CONSP(l_tmp)) - targ_dur = get_c_float(car(l_tmp)); - else - targ_dur = get_c_float(l_tmp); - - cand_dur = parent(cand_word,"Token")->F("dur",0.0); - - if ( targ_dur != cand_dur ) - score += 0.5; - } - - if ( (next_targ_word && !next_cand_word) || (!next_targ_word && next_cand_word) ) - score += 0.5; - else - if(next_targ_word && next_cand_word) - { - l_tmp = lisp_val(parent(next_targ_word,"Token")->f("dur",est_val(0))); - if(CONSP(l_tmp)) - targ_dur = get_c_float(car(l_tmp)); - else - targ_dur = get_c_float(l_tmp); - cand_dur = parent(next_cand_word,"Token")->F("dur",0.0); - - if ( targ_dur != cand_dur ) - score += 0.5; - } + const EST_Item *targ_word = tc_get_word(targ); + const EST_Item *cand_word = tc_get_word(cand); + const EST_Item *next_targ_word = tc_get_word(inext(targ)); + const EST_Item *next_cand_word = tc_get_word(inext(cand)); + float targ_dur, cand_dur; + LISP l_tmp; + + float score = 0.0; + + if ( (targ_word && !cand_word) || (!targ_word && cand_word) ) + score += 0.5; + else + if (targ_word && cand_word) + { + l_tmp = lisp_val(parent(targ_word,"Token")->f("dur",est_val(0))); + if(CONSP(l_tmp)) + targ_dur = get_c_float(car(l_tmp)); + else + targ_dur = get_c_float(l_tmp); + + cand_dur = parent(cand_word,"Token")->F("dur",0.0); + + if ( targ_dur != cand_dur ) + score += 0.5; + } + + if ( (next_targ_word && !next_cand_word) || (!next_targ_word && next_cand_word) ) + score += 0.5; + else + if(next_targ_word && next_cand_word) + { + l_tmp = lisp_val(parent(next_targ_word,"Token")->f("dur",est_val(0))); + if(CONSP(l_tmp)) + targ_dur = get_c_float(car(l_tmp)); + else + targ_dur = get_c_float(l_tmp); + cand_dur = parent(next_cand_word,"Token")->F("dur",0.0); + + if ( targ_dur != cand_dur ) + score += 0.5; + } - return score; + return score; } float EST_SingingTargetCost::operator()(const EST_Item* targ, const EST_Item* cand) const { - set_targ_and_cand(targ,cand); - score = 0.0; - weight_sum = 0.0; - - score += add_weight(50.0)*pitch_cost(); - score += add_weight(50.0)*duration_cost(); - score += add_weight(5.0)*stress_cost(); - score += add_weight(5.0)*position_in_syllable_cost(); - score += add_weight(5.0)*position_in_word_cost(); - score += add_weight(5.0)*partofspeech_cost(); - score += add_weight(5.0)*position_in_phrase_cost(); - score += add_weight(5.0)*punctuation_cost(); - score += add_weight(4.0)*left_context_cost(); - score += add_weight(3.0)*right_context_cost(); - score += add_weight(2.0)*bad_duration_cost(); // see also join cost. + set_targ_and_cand(targ,cand); + score = 0.0; + weight_sum = 0.0; + + score += add_weight(50.0)*pitch_cost(); + score += add_weight(50.0)*duration_cost(); + score += add_weight(5.0)*stress_cost(); + score += add_weight(5.0)*position_in_syllable_cost(); + score += add_weight(5.0)*position_in_word_cost(); + score += add_weight(5.0)*partofspeech_cost(); + score += add_weight(5.0)*position_in_phrase_cost(); + score += add_weight(5.0)*punctuation_cost(); + score += add_weight(4.0)*left_context_cost(); + score += add_weight(3.0)*right_context_cost(); + score += add_weight(2.0)*bad_duration_cost(); // see also join cost. - return score / weight_sum; + return score / weight_sum; } @@ -731,7 +728,7 @@ EST_Item *nn = as(s,"Intonation"); EST_Item *p; - for (p=daughter1(nn); p; p=p->next()) + for (p=daughter1(nn); p; p=inext(p)) if (p->name().contains("*")) return p->name(); return "NONE"; @@ -743,7 +740,7 @@ EST_Item *nn = as(s,"Intonation"); EST_Item *p; - for (p=daughter1(nn); p; p=p->next()) + for (p=daughter1(nn); p; p=inext(p)) { EST_String l = p->name(); if ((l.contains("%")) || (l.contains("-"))) diff -Nru festival-2.4~release/src/modules/MultiSyn/Makefile festival-2.5.0/src/modules/MultiSyn/Makefile --- festival-2.4~release/src/modules/MultiSyn/Makefile 2006-07-03 16:17:18.000000000 +0000 +++ festival-2.5.0/src/modules/MultiSyn/Makefile 2017-09-04 15:54:08.000000000 +0000 @@ -52,7 +52,7 @@ EST_TargetCost.h TargetCostRescoring.h \ EST_JoinCost.h EST_JoinCostCache.h \ DiphoneBackoff.h safety.h EST_DiphoneCoverage.h \ - EST_FlatTargetCost.h + EST_FlatTargetCost.h EST_HybridTargetCost.h SRCS = UnitSelection.cc \ @@ -61,7 +61,7 @@ EST_TargetCost.cc TargetCostRescoring.cc \ EST_JoinCost.cc EST_JoinCostCache.cc \ DiphoneBackoff.cc EST_DiphoneCoverage.cc \ - EST_FlatTargetCost.cc + EST_FlatTargetCost.cc EST_HybridTargetCost.cc OBJS = $(SRCS:.cc=.o) diff -Nru festival-2.4~release/src/modules/MultiSyn/TargetCostRescoring.cc festival-2.5.0/src/modules/MultiSyn/TargetCostRescoring.cc --- festival-2.4~release/src/modules/MultiSyn/TargetCostRescoring.cc 2010-11-05 14:13:02.000000000 +0000 +++ festival-2.5.0/src/modules/MultiSyn/TargetCostRescoring.cc 2017-09-04 15:54:08.000000000 +0000 @@ -58,60 +58,60 @@ void rescoreCandidates( EST_VTCandidate *candidates, float beam_width, float mult ) { - // first calculate the stats for the "model" - float dur = 0.0; - EST_Item *ph1 = 0; - EST_Item *ph2 = 0; - //EST_FVector *ph1_mid = 0; - //EST_FVector *ph2_mid = 0; + // first calculate the stats for the "model" + float dur = 0.0; + EST_Item *ph1 = 0; + EST_Item *ph2 = 0; + //EST_FVector *ph1_mid = 0; + //EST_FVector *ph2_mid = 0; - EST_TList scores; + EST_TList scores; - // get all scores to work out what durations are "suitable" - for( EST_VTCandidate *it = candidates; it != 0; it=it->next ){ - ph1 = it->s; - ph2 = ph1->next(); - // ph1_mid = fvector( ph1->f( "midcoef" ) ); - // ph2_mid = fvector( ph2->f( "midcoef" ) ); + // get all scores to work out what durations are "suitable" + for( EST_VTCandidate *it = candidates; it != 0; it=it->next ){ + ph1 = it->s; + ph2 = inext(ph1); + // ph1_mid = fvector( ph1->f( "midcoef" ) ); + // ph2_mid = fvector( ph2->f( "midcoef" ) ); - dur = getJoinTime(ph2) - getJoinTime(ph1); // duration of diphone unit - scores.append( ScorePair(it->score,dur, it) ); - } + dur = getJoinTime(ph2) - getJoinTime(ph1); // duration of diphone unit + scores.append( ScorePair(it->score,dur, it) ); + } - sort( scores ); - //cerr << scores << endl; + sort( scores ); + //cerr << scores << endl; - // calculate simple mean duration of some or all of candidates - float meandur = 0.0; - int n = 0; + // calculate simple mean duration of some or all of candidates + float meandur = 0.0; + int n = 0; - if( beam_width < 0 ){ // just average all of them - for( EST_Litem *li = scores.head(); li != 0; li = li->next() ){ - meandur += scores(li)._dur; - n++; + if( beam_width < 0 ){ // just average all of them + for( EST_Litem *li = scores.head(); li != 0; li = li->next() ){ + meandur += scores(li)._dur; + n++; + } + } + else{ + float score_cutoff = scores.first()._score + beam_width; + for( EST_Litem *li = scores.head(); li != 0; li = li->next() ){ + if( scores(li)._score > score_cutoff ) + break; + else{ + meandur += scores(li)._dur; + n++; + } + } } - } - else{ - float score_cutoff = scores.first()._score + beam_width; - for( EST_Litem *li = scores.head(); li != 0; li = li->next() ){ - if( scores(li)._score > score_cutoff ) - break; - else{ - meandur += scores(li)._dur; - n++; - } - } - } - meandur /= n; + meandur /= n; - // then tweak the scores based on that - for( EST_Litem *li = scores.head(); li != 0; li = li->next() ){ - float cand_dur = scores(li)._dur; - // cerr << scores(li)._cand->score << " "; - scores(li)._cand->score += (mult * abs( cand_dur - meandur ) ); - // cerr << scores(li)._cand->score << endl; - } + // then tweak the scores based on that + for( EST_Litem *li = scores.head(); li != 0; li = li->next() ){ + float cand_dur = scores(li)._dur; + // cerr << scores(li)._cand->score << " "; + scores(li)._cand->score += (mult * abs( cand_dur - meandur ) ); + // cerr << scores(li)._cand->score << endl; + } } ostream& operator << ( ostream& out, const ScorePair &sp ) diff -Nru festival-2.4~release/src/modules/MultiSyn/UnitSelection.cc festival-2.5.0/src/modules/MultiSyn/UnitSelection.cc --- festival-2.4~release/src/modules/MultiSyn/UnitSelection.cc 2014-12-11 15:24:37.000000000 +0000 +++ festival-2.5.0/src/modules/MultiSyn/UnitSelection.cc 2017-09-04 15:54:08.000000000 +0000 @@ -48,6 +48,7 @@ #include "EST_JoinCost.h" #include "EST_TargetCost.h" #include "EST_FlatTargetCost.h" +#include "EST_HybridTargetCost.h" #include "safety.h" @@ -88,7 +89,7 @@ EST_Item *it = u->relation("Unit")->first(); int i; for( i=1; i<=n && it!= 0; i++ ) - it=it->next(); + it=inext(it); if( i<=n ) EST_error( "unit number greater than number of items in unit relation") ; @@ -165,21 +166,27 @@ EST_String *uttExt, EST_String *wavExt, EST_String *pmExt, - EST_String *coefExt ) + EST_String *JCCoefExt, + EST_String *TCCoefExt) { int listlen = siod_llength( l_dataparams ); - if( listlen == 8 ){ + if( listlen == 8 || listlen == 9){ *uttExt = get_c_string( CAR1(CDR4(l_dataparams)) ); *wavExt = get_c_string( CAR2(CDR4(l_dataparams)) ); *pmExt = get_c_string( CAR3(CDR4(l_dataparams)) ); - *coefExt = get_c_string( CAR4(CDR4(l_dataparams)) ); + *JCCoefExt = get_c_string( CAR4(CDR4(l_dataparams)) ); + if( listlen == 8 ) + *TCCoefExt = EST_String::Empty; + else + *TCCoefExt = get_c_string( CAR5(CDR4(l_dataparams)) ); } else if( listlen == 4 ){ //set some defaults *uttExt = ".utt"; *wavExt = ".wav"; *pmExt = ".pm"; - *coefExt = ".coef"; + *JCCoefExt = ".coef"; + *TCCoefExt = EST_String::Empty; } else EST_error( "Incorrect number of voice data parameters" ); @@ -194,7 +201,7 @@ static LISP FT_make_du_voice( LISP l_bnames, LISP l_datadirs, LISP l_srate ) { EST_String uttDir, wavDir, pmDir, coefDir; - EST_String uttExt, wavExt, pmExt, coefExt; + EST_String uttExt, wavExt, pmExt, JCCoefExt, TCCoefExt; int wav_srate = get_c_int( l_srate ); if( wav_srate <= 0 ) @@ -202,7 +209,7 @@ parseVoiceDataParams( l_datadirs, &uttDir, &wavDir, &pmDir, &coefDir, - &uttExt, &wavExt, &pmExt, &coefExt ); + &uttExt, &wavExt, &pmExt, &JCCoefExt, &TCCoefExt ); EST_StrList bnames; siod_list_to_strlist( l_bnames, bnames ); @@ -211,7 +218,7 @@ v = new DiphoneUnitVoice( bnames, uttDir, wavDir, pmDir, coefDir, static_cast(wav_srate), - uttExt, wavExt, pmExt, coefExt ); + uttExt, wavExt, pmExt, JCCoefExt, TCCoefExt ); CHECK_PTR(v); @@ -222,7 +229,7 @@ static LISP FT_make_du_voice_module( LISP l_bnames, LISP l_datadirs, LISP l_srate ) { EST_String uttDir, wavDir, pmDir, coefDir; - EST_String uttExt, wavExt, pmExt, coefExt; + EST_String uttExt, wavExt, pmExt, JCCoefExt, TCCoefExt; int wav_srate = get_c_int( l_srate ); if( wav_srate <= 0 ) @@ -230,7 +237,7 @@ parseVoiceDataParams( l_datadirs, &uttDir, &wavDir, &pmDir, &coefDir, - &uttExt, &wavExt, &pmExt, &coefExt ); + &uttExt, &wavExt, &pmExt, &JCCoefExt, &TCCoefExt ); EST_StrList bnames; siod_list_to_strlist( l_bnames, bnames ); @@ -239,7 +246,7 @@ vm = new DiphoneVoiceModule( bnames, uttDir, wavDir, pmDir, coefDir, static_cast(wav_srate), - uttExt, wavExt, pmExt, coefExt ); + uttExt, wavExt, pmExt, JCCoefExt, TCCoefExt ); CHECK_PTR(vm); return siod( vm ); @@ -248,7 +255,7 @@ static LISP FT_voice_add_module( LISP l_duv, LISP l_bnames, LISP l_datadirs, LISP l_srate ) { EST_String uttDir, wavDir, pmDir, coefDir; - EST_String uttExt, wavExt, pmExt, coefExt; + EST_String uttExt, wavExt, pmExt, JCCoefExt, TCCoefExt; int wav_srate = get_c_int( l_srate ); @@ -257,7 +264,7 @@ parseVoiceDataParams( l_datadirs, &uttDir, &wavDir, &pmDir, &coefDir, - &uttExt, &wavExt, &pmExt, &coefExt ); + &uttExt, &wavExt, &pmExt, &JCCoefExt, &TCCoefExt ); EST_StrList bnames; siod_list_to_strlist( l_bnames, bnames ); @@ -265,7 +272,7 @@ if( DiphoneUnitVoice* duv = dynamic_cast(voice(l_duv)) ){ if( ! duv->addVoiceModule(bnames, uttDir, wavDir, pmDir, coefDir, static_cast(wav_srate), - uttExt, wavExt, pmExt, coefExt ) ) + uttExt, wavExt, pmExt, JCCoefExt, TCCoefExt ) ) EST_error( "voice.addModule failed" ); } else @@ -443,6 +450,10 @@ tc = new EST_APMLTargetCost(); CHECK_PTR(tc); } + else if(streq(get_c_string(l_tc),"hybrid")){ + tc = new EST_HybridTargetCost(); + CHECK_PTR(tc); + } else if(streq(get_c_string(l_tc),"singing")){ tc = new EST_SingingTargetCost(); CHECK_PTR(tc); @@ -626,6 +637,20 @@ return NIL; } +static LISP FT_fill_target_coefficients(LISP l_voice, LISP l_utt, LISP l_trackfile) +{ + EST_Utterance *utt = get_c_utt(l_utt); + DiphoneUnitVoice *duv = dynamic_cast(voice(l_voice)); + EST_String filename = get_c_string(l_trackfile); + + EST_Track tcCoefs; + if( (tcCoefs.load(filename) != read_ok )) + EST_error( "Couldn't load data file %s", (const char*)filename ); + duv->fill_target_coefficients(utt, &tcCoefs); + + return l_utt; +} + void festival_MultiSyn_init(void) { @@ -804,6 +829,10 @@ "(du_voice.getDiphoneCoverage DU_VOICE FILENAME)\n\ prints diphone coverage information for this voice\n\ use filename '-' for stdout."); + + init_subr_3("multisyn_hybrid_fill_target_coefficients", FT_fill_target_coefficients, + "(multisyn_hybrid_fill_target_coefficients VOICE UTT TRACKFILE)\n\ + Use the voice to add the given target cost coefficients to the utterance."); } diff -Nru festival-2.4~release/src/modules/parser/pparser.cc festival-2.5.0/src/modules/parser/pparser.cc --- festival-2.4~release/src/modules/parser/pparser.cc 2013-04-11 14:19:05.000000000 +0000 +++ festival-2.5.0/src/modules/parser/pparser.cc 2017-09-04 15:54:08.000000000 +0000 @@ -120,14 +120,14 @@ EST_SCFG_Chart chart; chart.set_grammar_rules(rules); - for (st=u->relation("Token")->head(); st; st = st->next()) + for (st=u->relation("Token")->head(); st; st = inext(st)) { - for (et=st->next(); et; et=et->next()) + for (et=inext(st); et; et=inext(et)) if (wagon_predict(et,eos_tree) != 0) break; // Now find related words s = first_leaf(st)->as_relation("Word"); - e = first_leaf(et->next())->as_relation("Word"); + e = first_leaf(inext(et))->as_relation("Word"); chart.setup_wfst(s,e,"phr_pos"); chart.parse(); chart.extract_parse(u->relation("Syntax"),s,e,TRUE); @@ -159,13 +159,13 @@ // produce a parse wherever there is a sentence end marker or // the end of utterance. - for (w = s = u.relation("Word")->head(); w; w = w->next()) - if (w->f_present("sentence_end") || (w->next() == 0)) + for (w = s = u.relation("Word")->head(); w; w = inext(w)) + if (w->f_present("sentence_end") || (inext(w) == 0)) { - chart.setup_wfst(s, w->next(), "phr_pos"); + chart.setup_wfst(s, inext(w), "phr_pos"); chart.parse(); - chart.extract_parse(u.relation("Syntax"), s, w->next(), TRUE); - s = w->next(); + chart.extract_parse(u.relation("Syntax"), s, inext(w), TRUE); + s = inext(w); } } diff -Nru festival-2.4~release/src/modules/Text/text.cc festival-2.5.0/src/modules/Text/text.cc --- festival-2.4~release/src/modules/Text/text.cc 2010-11-05 14:13:02.000000000 +0000 +++ festival-2.5.0/src/modules/Text/text.cc 2017-09-04 15:54:08.000000000 +0000 @@ -241,7 +241,7 @@ t = ts.get(); tok = add_token(u,t); app_tok(tok); // do what you do with the token - ebo = as(tok,"Token")->prev(); // end but one token + ebo = iprev(as(tok,"Token")); // end but one token if ((ebo != 0) && (wagon_predict(ebo,eou_tree) == 1)) { diff -Nru festival-2.4~release/src/modules/Text/token.cc festival-2.5.0/src/modules/Text/token.cc --- festival-2.4~release/src/modules/Text/token.cc 2010-11-05 14:13:02.000000000 +0000 +++ festival-2.5.0/src/modules/Text/token.cc 2017-09-04 15:54:08.000000000 +0000 @@ -106,7 +106,7 @@ user_token_to_word_func = siod_get_lval("token_to_words",NULL); u->create_relation("Word"); - for (t=u->relation("Token")->first(); t != 0; t = t->next()) + for (t=u->relation("Token")->first(); t != 0; t = inext(t)) { if (user_token_to_word_func != NIL) { @@ -145,7 +145,7 @@ user_token_to_word_func = siod_get_lval("token_to_words",NULL); u->create_relation("Word"); - for (t=u->relation("Token")->first(); t != 0; t = t->next()) + for (t=u->relation("Token")->first(); t != 0; t = inext(t)) { words = word_it(t,t->name()); // Initial punctuation becomes words diff -Nru festival-2.4~release/src/modules/Text/token_pos.cc festival-2.5.0/src/modules/Text/token_pos.cc --- festival-2.4~release/src/modules/Text/token_pos.cc 2010-11-05 14:13:02.000000000 +0000 +++ festival-2.5.0/src/modules/Text/token_pos.cc 2017-09-04 15:54:08.000000000 +0000 @@ -54,7 +54,7 @@ trees = siod_get_lval("token_pos_cart_trees",NULL); if (trees == NIL) return utt; - for (t=u->relation("Token")->first(); t != 0; t = t->next()) + for (t=u->relation("Token")->first(); t != 0; t = inext(t)) { if (t->f("token_pos","0") == "0") for (l=trees; l != NIL; l=cdr(l)) // find a tree that matches diff -Nru festival-2.4~release/src/modules/Text/tok_ext.cc festival-2.5.0/src/modules/Text/tok_ext.cc --- festival-2.4~release/src/modules/Text/tok_ext.cc 2010-11-05 14:13:02.000000000 +0000 +++ festival-2.5.0/src/modules/Text/tok_ext.cc 2017-09-04 15:54:08.000000000 +0000 @@ -141,7 +141,7 @@ for (i=0; i < lhc; i++) append_token(ps,EST_Token("*lhc*")); append_token(ps,ts.get()); - ns = ps.last(); + ns = ps.rlast(); for (i=0; i < rhc; i++) append_token(ps,ts.get()); return ns; @@ -153,7 +153,7 @@ append_token(ps,ts.get()); remove_item(ps.first(),"Token"); - return s->next(); + return inext(s); } static void append_token(EST_Relation &ps, const EST_Token &t) diff -Nru festival-2.4~release/src/modules/UniSyn/UniSyn.cc festival-2.5.0/src/modules/UniSyn/UniSyn.cc --- festival-2.4~release/src/modules/UniSyn/UniSyn.cc 2010-11-05 14:13:02.000000000 +0000 +++ festival-2.5.0/src/modules/UniSyn/UniSyn.cc 2017-09-04 15:54:08.000000000 +0000 @@ -165,7 +165,7 @@ { EST_Item *n = seg.tail()->insert_after(); n->set("name", ph_silence()); - n->set("end", seg.tail()->prev()->F("end") + 0.1); + n->set("end", iprev(seg.tail())->F("end") + 0.1); } us_get_copy_wave(*utt, *sig, *pm, seg); @@ -354,7 +354,7 @@ u->relation("Segment")->clear(); - for (s = tu.relation("Segment")->head(); s; s = s->next()) + for (s = tu.relation("Segment")->head(); s; s = inext(s)) { t = u->relation("Segment")->append(); t->fset("name", s->fS("name")); diff -Nru festival-2.4~release/src/modules/UniSyn/us_features.cc festival-2.5.0/src/modules/UniSyn/us_features.cc --- festival-2.4~release/src/modules/UniSyn/us_features.cc 2010-11-05 14:13:02.000000000 +0000 +++ festival-2.5.0/src/modules/UniSyn/us_features.cc 2017-09-04 15:54:08.000000000 +0000 @@ -46,7 +46,7 @@ void add_feature_function(EST_Relation &r, const EST_String &fname, const EST_String &funcname) { - for (EST_Item *p = r.head(); p; p = p->next()) + for (EST_Item *p = r.head(); p; p = inext(p)) p->set_function(fname, funcname); } @@ -55,7 +55,7 @@ { EST_Features::Entries a; - for (EST_Item *p = s; p; p = p->next()) + for (EST_Item *p = s; p; p = inext(p)) { if (daughter1(p) != 0) { @@ -104,7 +104,7 @@ EST_Item *nth(EST_Relation &r, int n) { int i = 1; - for (EST_Item *s = r.head(); s; s = s->next(), ++i) + for (EST_Item *s = r.head(); s; s = inext(s), ++i) if (n == i) return s; diff -Nru festival-2.4~release/src/modules/UniSyn/us_mapping.cc festival-2.5.0/src/modules/UniSyn/us_mapping.cc --- festival-2.4~release/src/modules/UniSyn/us_mapping.cc 2014-12-18 15:48:03.000000000 +0000 +++ festival-2.5.0/src/modules/UniSyn/us_mapping.cc 2017-09-04 15:54:08.000000000 +0000 @@ -70,7 +70,7 @@ // //cout << "Source_pm" << source_pm.equal_space() << endl << endl; // //cout << "Target_pm" << target_pm.equal_space() << endl << endl; -// for (s = source_lab.head(); s; s = s->next()) +// for (s = source_lab.head(); s; s = inext(s)) // { // s_end = s->F("source_end"); // t_end = s->F("end"); @@ -104,120 +104,120 @@ EST_Track &source_pm, EST_Track &target_pm, EST_IVector &map) { - int i = 0; - int s_i_start, s_i_end, t_i_start, t_i_end; - EST_Item *s; - float s_end, s_start, t_end, t_start, m; - map.resize(target_pm.num_frames()); + int i = 0; + int s_i_start, s_i_end, t_i_start, t_i_end; + EST_Item *s; + float s_end, s_start, t_end, t_start, m; + map.resize(target_pm.num_frames()); - s_start = t_start = 0.0; - s_i_start = t_i_start = 0; + s_start = t_start = 0.0; + s_i_start = t_i_start = 0; - if (target_pm.t(target_pm.num_frames() - 1) < - source_lab.tail()->F("end",0)) + if (target_pm.t(target_pm.num_frames() - 1) < + source_lab.tail()->F("end",0)) { - EST_warning("Target pitchmarks end before end of target segment " - "timings (%f vs %f). Expect a truncated utterance\n", - target_pm.t(target_pm.num_frames() - 1), - source_lab.tail()->F("end",0.0)); + EST_warning("Target pitchmarks end before end of target segment " + "timings (%f vs %f). Expect a truncated utterance\n", + target_pm.t(target_pm.num_frames() - 1), + source_lab.tail()->F("end",0.0)); } - for (s = source_lab.head(); s; s = s->next()) + for (s = source_lab.head(); s; s = inext(s)) { - // printf( "*********************************************\nphone %s\n", s->S("name").str()); + // printf( "*********************************************\nphone %s\n", s->S("name").str()); - s_end = s->F("source_end"); - t_end = s->F("end"); + s_end = s->F("source_end"); + t_end = s->F("end"); - s_i_end = source_pm.index_below(s_end); - t_i_end = target_pm.index_below(t_end); + s_i_end = source_pm.index_below(s_end); + t_i_end = target_pm.index_below(t_end); - // make sure that at least one frame is available - if (s_i_end <= s_i_start) - s_i_end += 1; + // make sure that at least one frame is available + if (s_i_end <= s_i_start) + s_i_end += 1; -// printf("(s_i_start s_i_end t_i_start t_i_end) %d %d %d %d\n", -// s_i_start, s_i_end, t_i_start, t_i_end ); -// printf("(s_i_start-s_i_end t_i_start-t_i_end) %d %d\n", -// s_i_end - s_i_start, t_i_end - t_i_start); + // printf("(s_i_start s_i_end t_i_start t_i_end) %d %d %d %d\n", + // s_i_start, s_i_end, t_i_start, t_i_end ); + // printf("(s_i_start-s_i_end t_i_start-t_i_end) %d %d\n", + // s_i_end - s_i_start, t_i_end - t_i_start); - // OK for time alignment mapping function here to be single - // linear across subcomponents?... - // m = float(t_i_end-t_i_start+1)/float (s_i_end-s_i_start+1); - m = (t_end-t_start)/(s_end-s_start); - //m =1.0; - // printf( "m=%f\n", m ); + // OK for time alignment mapping function here to be single + // linear across subcomponents?... + // m = float(t_i_end-t_i_start+1)/float (s_i_end-s_i_start+1); + m = (t_end-t_start)/(s_end-s_start); + //m =1.0; + // printf( "m=%f\n", m ); - // time offsets for relative times - float apm_t_off = (s_i_start==0) ? 0.0 : source_pm.t(s_i_start-1); - float tpm_t_off = (t_i_start==0) ? 0.0 : target_pm.t(t_i_start-1); + // time offsets for relative times + float apm_t_off = (s_i_start==0) ? 0.0 : source_pm.t(s_i_start-1); + float tpm_t_off = (t_i_start==0) ? 0.0 : target_pm.t(t_i_start-1); -// printf("apm_t_off = %f\ntpm_t_off = %f\n", apm_t_off, tpm_t_off); + // printf("apm_t_off = %f\ntpm_t_off = %f\n", apm_t_off, tpm_t_off); - int apm_i = s_i_start; // analysis pitch mark index - float apm_t = source_pm.t(apm_i)-apm_t_off;// analysis pitch mark time - float next_apm_t = source_pm.t(apm_i+1)-apm_t_off; + int apm_i = s_i_start; // analysis pitch mark index + float apm_t = source_pm.t(apm_i)-apm_t_off;// analysis pitch mark time + float next_apm_t = source_pm.t(apm_i+1)-apm_t_off; - for( i=t_i_start; i<=t_i_end; ++i ){ - float tpm_t = target_pm.t(i)-tpm_t_off; // target pitch mark time + for( i=t_i_start; i<=t_i_end; ++i ){ + float tpm_t = target_pm.t(i)-tpm_t_off; // target pitch mark time - // find closest pitchmark (assume only need forward search from current - // point, since pitchmarks should always be increasing) - while( (apm_i<=s_i_end) && (fabs((next_apm_t*m)-tpm_t) <= fabs((apm_t*m)-tpm_t)) ){ -// printf("(next_apm_t apm_t) %f %f\n", -// fabs((next_apm_t*m)-tpm_t), fabs((apm_t*m)-tpm_t) ); - apm_t = next_apm_t; - ++apm_i; - next_apm_t = source_pm.t(apm_i+1)-apm_t_off; - } + // find closest pitchmark (assume only need forward search from current + // point, since pitchmarks should always be increasing) + while( (apm_i<=s_i_end) && (fabs((next_apm_t*m)-tpm_t) <= fabs((apm_t*m)-tpm_t)) ){ + // printf("(next_apm_t apm_t) %f %f\n", + // fabs((next_apm_t*m)-tpm_t), fabs((apm_t*m)-tpm_t) ); + apm_t = next_apm_t; + ++apm_i; + next_apm_t = source_pm.t(apm_i+1)-apm_t_off; + } -// // printf( "tpm %d = apm %d\n", i, apm_i ); + // // printf( "tpm %d = apm %d\n", i, apm_i ); -// int slow_index = source_pm.index( target_pm(i) ); + // int slow_index = source_pm.index( target_pm(i) ); -// printf( "(my slow) %d %d\n", apm_i, slow_index ); + // printf( "(my slow) %d %d\n", apm_i, slow_index ); - map[i] = apm_i; - } + map[i] = apm_i; + } - // for next loop - if (s->next()) - { - s_i_start = s_i_end+1; - t_i_start = t_i_end+1; - s_start = source_pm.t(s_i_start); - t_start = target_pm.t(t_i_start); - } + // for next loop + if (inext(s)) + { + s_i_start = s_i_end+1; + t_i_start = t_i_end+1; + s_start = source_pm.t(s_i_start); + t_start = target_pm.t(t_i_start); + } } - if (i == 0) - map.resize(0); // nothing to synthesize - else - map.resize(i); + if (i == 0) + map.resize(0); // nothing to synthesize + else + map.resize(i); } void make_linear_mapping(EST_Track &pm, EST_IVector &map) { - int pm_num_frames = pm.num_frames(); + int pm_num_frames = pm.num_frames(); - map.resize(pm_num_frames); + map.resize(pm_num_frames); - for (int i = 0; i < pm_num_frames; ++i) - map[i] = i; + for (int i = 0; i < pm_num_frames; ++i) + map[i] = i; } static bool contiguous( const EST_Item*left, const EST_Item* right ) { - if( (item(left->f("source_ph1")))->next() == item(right->f("source_ph1")) ) - return true; + if( (inext(item(left->f("source_ph1")))) == item(right->f("source_ph1")) ) + return true; - return false; + return false; } @@ -269,9 +269,9 @@ voicing[i] = 0; } // middle loop - for( EST_Item *diphone_right=diphone_left->next(); + for( EST_Item *diphone_right=inext(diphone_left); diphone_right; - diphone_right=diphone_left->next() ){ + diphone_right=inext(diphone_left) ){ printf( "%s\t%f\n", diphone_left->S("name").str(), diphone_left->F("end")); @@ -284,7 +284,7 @@ right_start_index, right_end_index ); - EST_String join_phone_name = item(diphone_left->f("ph1"))->next()->S("name"); + EST_String join_phone_name = inext(item(diphone_left->f("ph1")))->S("name"); cerr << "phone contigous " << contiguous(diphone_left,diphone_right) << endl; @@ -461,9 +461,9 @@ voicing[i] = 0; } // middle loop - for( EST_Item *diphone_right=diphone_left->next(); + for( EST_Item *diphone_right=inext(diphone_left); diphone_right; - diphone_right=diphone_left->next() ){ + diphone_right=inext(diphone_left) ){ printf( "%s\t%f\n", diphone_left->S("name").str(), diphone_left->F("end")); @@ -476,7 +476,7 @@ right_start_index, right_end_index ); - EST_String join_phone_name = item(diphone_left->f("ph1"))->next()->S("name"); + EST_String join_phone_name = inext(item(diphone_left->f("ph1")))->S("name"); cerr << "phone contigous " << contiguous(diphone_left,diphone_right) << endl; @@ -691,10 +691,10 @@ EST_Item *last_s = 0; - for (s = u->relation("smap")->head(); s; s = s->next()) + for (s = u->relation("smap")->head(); s; s = inext(s)) { int n = s->I("index"); - for (t = u->relation("tmap")->head(); t; t = t->next()) + for (t = u->relation("tmap")->head(); t; t = inext(t)) { if (map(t->I("index")) == n) { @@ -728,7 +728,7 @@ "timings. Expect a truncated utterance.\n"); for (s = source_lab.head(), t = target_lab.head(); s && t; - s = s->next(), t = t->next()) + s = inext(s), t = inext(t)) { if (s->S("name") != t->S("name")) cerr << "Warning: Source and Target segment names do not match: " @@ -778,14 +778,14 @@ s_start = t_start = 0.0; // should really be replaced by feature functions. - for (prev_end = 0.0, s = source_lab.head(); s; s = s->next()) + for (prev_end = 0.0, s = source_lab.head(); s; s = inext(s)) { s->set("start", prev_end); prev_end = s->F("end"); } // should really be replaced by feature functions. - for (prev_end = 0.0, s = target_lab.head(); s; s = s->next()) + for (prev_end = 0.0, s = target_lab.head(); s; s = inext(s)) { s->set("start", prev_end); prev_end = s->F("end"); @@ -796,14 +796,14 @@ EST_warning("Target pitchmarks end before end of target segment " "timings. Expect a truncated utterance.\n"); - for (s = source_lab.head(); s; s = s->next()) + for (s = source_lab.head(); s; s = inext(s)) { s_start = s->F("start"); cout << "source: " << *s << endl; while (s && (!s->in_relation(match_name))) - s = s->next(); + s = inext(s); cout << "active source: " << *s << endl; diff -Nru festival-2.4~release/src/modules/UniSyn/us_prosody.cc festival-2.5.0/src/modules/UniSyn/us_prosody.cc --- festival-2.4~release/src/modules/UniSyn/us_prosody.cc 2014-12-20 15:45:57.000000000 +0000 +++ festival-2.5.0/src/modules/UniSyn/us_prosody.cc 2017-09-04 15:54:08.000000000 +0000 @@ -353,7 +353,7 @@ target_pm.resize(1000000, 0); s_start = 0.0; - for (s = guide.head(); s; s = s->next()) + for (s = guide.head(); s; s = inext(s)) { s_end = s->F("end", 1); if (s->fI("use_pm") == 1) @@ -397,7 +397,7 @@ cout << "tag: " << target_seg << endl; - for (t = target_seg.head(); t; t = t->next()) + for (t = target_seg.head(); t; t = inext(t)) { s = daughter1(t,"Match"); if (s == 0) // ie extra phone in target specification diff -Nru festival-2.4~release/src/modules/UniSyn/us_unit.cc festival-2.5.0/src/modules/UniSyn/us_unit.cc --- festival-2.4~release/src/modules/UniSyn/us_unit.cc 2010-11-05 14:13:02.000000000 +0000 +++ festival-2.5.0/src/modules/UniSyn/us_unit.cc 2017-09-04 15:54:08.000000000 +0000 @@ -267,7 +267,7 @@ float scale; EST_WindowFunc *window_function; - for (u = unit_stream.head(); u; u = u->next()) + for (u = unit_stream.head(); u; u = inext(u)) num += track(u->f("coefs"))->num_frames(); frames.resize(num); @@ -279,7 +279,7 @@ window_function = EST_Window::creator(window_name); - for (i = 0, u = unit_stream.head(); u; u = u->next()) + for (i = 0, u = unit_stream.head(); u; u = inext(u)) { sig = wave(u->f("sig")); coefs = track(u->f("coefs")); @@ -336,7 +336,7 @@ utt.create_relation("TmpSegment"); - for (s = source_seg.head(); s; s = s->next()) + for (s = source_seg.head(); s; s = inext(s)) { n = utt.relation("TmpSegment")->append(); merge_features(n, s, 0); @@ -367,7 +367,7 @@ { EST_Wave *sig; - for (EST_Item *s = unit.head(); s; s = s->next()) + for (EST_Item *s = unit.head(); s; s = inext(s)) { sig = wave(s->f("sig")); if (s->f_present("energy_factor")) @@ -392,7 +392,7 @@ sig->fill(0); j = 0; - for (EST_Item *s = utt.relation("Unit", 1)->head(); s; s = s->next()) + for (EST_Item *s = utt.relation("Unit", 1)->head(); s; s = inext(s)) { unit_sig = wave(s->f("sig")); unit_coefs = track(s->f("coefs")); @@ -447,7 +447,7 @@ } else{ EST_Track *t = 0; - for ( ; u; u = u->next()) + for ( ; u; u = inext(u)) { t = track(u->f("coefs")); num_source_frames += t->num_frames(); @@ -460,7 +460,7 @@ prev_time = 0.0; // copy basic information - for (i = 0, l = 0, u = unit_stream.head(); u; u = u->next()) + for (i = 0, l = 0, u = unit_stream.head(); u; u = inext(u)) { coefs = track(u->f("coefs")); @@ -573,14 +573,14 @@ FILE *ofile = fopen( "./join_times.est", "w" ); EST_Relation *units = utt->relation("Unit"); - for( EST_Item *u=units->head(); u; u=u->next() ){ + for( EST_Item *u=units->head(); u; u=inext(u) ){ EST_Item *diphone_left = u; - // EST_Item *diphone_right = u->next(); + // EST_Item *diphone_right = inext(u); fprintf( ofile, "%s\t%f\n", diphone_left->S("name").str(), diphone_left->F("end")); - EST_Item *join_phone_left = item(diphone_left->f("ph1"))->next(); + EST_Item *join_phone_left = inext(item(diphone_left->f("ph1"))); EST_String phone_name = join_phone_left->S("name"); if( ph_is_sonorant( phone_name ) && !ph_is_silence( phone_name )){ diff -Nru festival-2.4~release/src/modules/UniSyn_diphone/us_diphone_index.cc festival-2.5.0/src/modules/UniSyn_diphone/us_diphone_index.cc --- festival-2.4~release/src/modules/UniSyn_diphone/us_diphone_index.cc 2010-11-05 14:13:02.000000000 +0000 +++ festival-2.5.0/src/modules/UniSyn_diphone/us_diphone_index.cc 2017-09-04 15:54:08.000000000 +0000 @@ -151,7 +151,7 @@ utt.relation("Unit")->f.set("sig_ext", diph_index->sig_ext); } - for (p = p->next(); p; p = p->next()) + for (p = inext(p); p; p = inext(p)) { d = utt.relation("Unit")->append(); name2 = get_diphone_name(p,"right"); @@ -161,7 +161,7 @@ } // utt.create_relation("SourceSegments"); -// for (p = utt.relation("Segment", 1)->head(); p; p = p->next()) +// for (p = utt.relation("Segment", 1)->head(); p; p = inext(p)) // { // d = utt.relation("SourceSegments")->append(); // d->set_name(p->name()); @@ -209,7 +209,7 @@ if ((fp = fopen(group_file, "wb")) == NULL) EST_error("US DB: failed to open group file as temporary file\n"); - for (d = diphone.head(); d; d = d->next()) + for (d = diphone.head(); d; d = inext(d)) { sig = wave(d->f("sig")); tr = track(d->f("coefs")); @@ -238,7 +238,7 @@ fprintf(fp, "sig_file_format %s\n",(const char *)sig_file_format); fprintf(fp, "EST_Header_End\n"); - for (d = diphone.head(); d; d = d->next()) + for (d = diphone.head(); d; d = inext(d)) fprintf(fp, "%s %d %d %d\n", (const char *)d->S("name"), d->I("track_start"), d->I("wave_start"), d->I("middle_frame")); @@ -536,7 +536,7 @@ int samp_start, samp_end; float start_time; - for (s = unit.head(); s; s = s->next()) + for (s = unit.head(); s; s = inext(s)) { sub_coefs = new EST_Track; diff -Nru festival-2.4~release/src/modules/UniSyn_diphone/us_diphone_unit.cc festival-2.5.0/src/modules/UniSyn_diphone/us_diphone_unit.cc --- festival-2.4~release/src/modules/UniSyn_diphone/us_diphone_unit.cc 2010-11-05 14:13:02.000000000 +0000 +++ festival-2.5.0/src/modules/UniSyn_diphone/us_diphone_unit.cc 2017-09-04 15:54:08.000000000 +0000 @@ -49,7 +49,7 @@ { float prev_end = 0; - for (EST_Item *p = r.head(); p ; p = p->next()) + for (EST_Item *p = r.head(); p ; p = inext(p)) { p->set("end", p->F("dur") + prev_end); prev_end = p->F("end"); @@ -113,8 +113,8 @@ float t_time = 0.0, end; p_time = 0.0; - for (s = source_lab.head(), u = diphone_stream.head(); u; u = u->next(), - s = s->next()) + for (s = source_lab.head(), u = diphone_stream.head(); u; u = inext(u), + s = inext(s)) { pm = track(u->f("coefs")); @@ -216,7 +216,7 @@ // go to the periods before and after samp_start = (int)(full_coefs.t(Gof((pm_start - 1), 0)) * (float)full_sig.sample_rate()); - if (pm_end+1 < full_coefs.num_frames()) + if (pm_end+2 < full_coefs.num_frames()) pm_end++; samp_end = (int)(full_coefs.t(pm_end) * (float)full_sig.sample_rate()); diff -Nru festival-2.4~release/src/modules/UniSyn_phonology/mettree.cc festival-2.5.0/src/modules/UniSyn_phonology/mettree.cc --- festival-2.4~release/src/modules/UniSyn_phonology/mettree.cc 2010-11-05 14:13:02.000000000 +0000 +++ festival-2.5.0/src/modules/UniSyn_phonology/mettree.cc 2017-09-04 15:54:08.000000000 +0000 @@ -125,43 +125,9 @@ void subword_metrical_tree(EST_Item *w, EST_Relation &syllable, EST_Relation &metricaltree); - -/*void phonemic_trans(EST_Relation &trans) -{ - EST_Item *s, *n; - EST_String a; - -// cout << "trans: " << trans << endl; - - for (s = trans.head(); s; s = s->next()) - { - n = s->next(); -// cout << *s; - if (s->S("name").contains("cl")) - { - a = s->S("name").before("cl"); - if ((next(s) != 0) && (next(s)->S("name") == a)) - trans.remove_item(s); - else if ((next(s) != 0) && (a == "dcl" ) - && (next(s)->S("name") == "jh")) - trans.remove_item(s); - else if ((next(s) != 0) && (a == "tcl" ) - && (next(s)->S("name") == "ch")) - trans.remove_item(s); - else - s->set("name", a); -// cout << "here1: " << a << "\n"; -// s->set("name", s->S("name").before("cl")); - - } - } -} -*/ - - EST_Item *prev_match(EST_Item *n) { - EST_Item *p = n->prev(); + EST_Item *p = iprev(n); if (p == 0) return 0; @@ -184,7 +150,7 @@ } p = p->as_relation("SurfacePhone"); - pp_end = (prev(p) != 0) ? prev(p)->F("end",0.0) : 0.0; + pp_end = (iprev(p) != 0) ? iprev(p)->F("end",0.0) : 0.0; s = p->insert_after(); @@ -237,7 +203,7 @@ n = (float)phone.length(); div = dur/n; - for (i = 0, s = phone.head(); s; s = s->next(), ++i) + for (i = 0, s = phone.head(); s; s = inext(s), ++i) { s->set("start", start + div * (float) i); s->set("end", start + div * (float) (i + 1)); @@ -258,7 +224,7 @@ utt.create_relation("IntonationPhrase"); - for (s = utt.relation("MetricalTree", 1)->head(); s; s = s->next()) + for (s = utt.relation("MetricalTree", 1)->head(); s; s = inext(s)) { for (r = first_leaf_in_tree(s); r != next_leaf(last_leaf_in_tree(s)); r = next_leaf(r)) @@ -279,7 +245,7 @@ if (first_accent) { cout << "first accent: " << *first_accent << endl; - a = first_accent->prev(); + a = iprev(first_accent); if (a->S("name","") != "phrase_start") a = first_accent->insert_before(); @@ -308,7 +274,7 @@ if (last_accent) { cout << "last accent: " << *last_accent << endl; - a = last_accent->next(); + a = inext(last_accent); if (a->S("name","") != "phrase_end") a = last_accent->insert_after(); } @@ -332,7 +298,7 @@ // now join any other marked phrase_start/ends to intermediate // nodes in metrical tree. - /* for (s = u.relation("Intonation", 1)->head(); s; s = s->next()) + /* for (s = u.relation("Intonation", 1)->head(); s; s = inext(s)) { if (!s->in_relation("IntonationPhrase") && !s->in_relation("IntonationSyllable")) @@ -356,7 +322,7 @@ pos = t->F("time"); max = 100000.0; - for (p = utt.relation("Syllable")->head(); p; p = p->next()) + for (p = utt.relation("Syllable")->head(); p; p = inext(p)) { if (t->S("name") == "phrase_end") d = fabs(pos - p->F("end")); @@ -408,7 +374,7 @@ // cout << "surface: " << surface << endl; - for (s = lexical.head(); s; s = s->next()) + for (s = lexical.head(); s; s = inext(s)) { if ((t = daughter1(s->as_relation("Match"))) != 0) { @@ -433,12 +399,12 @@ lexical.tail()->set("start", last_end); } - for (s = lexical.head(); s; s = s->next()) + for (s = lexical.head(); s; s = inext(s)) { if (!s->f_present("end")) { // cout << "missing end feature for " << *s << endl; - for (i = 1, p = s; p; p = p->next(), ++i) + for (i = 1, p = s; p; p = inext(p), ++i) if (p->f_present("end")) break; inc = (p->F("end") - prev_end) / ((float) i); @@ -446,7 +412,7 @@ // cout << "stop phone is " << *p << endl; - for (i = 1; s !=p ; s = s->next(), ++i) + for (i = 1; s !=p ; s = inext(s), ++i) { s->set("end", (prev_end + ((float) i * inc))); s->set("start", (prev_end + ((float) (i - 1 )* inc))); @@ -519,7 +485,7 @@ move_sub_tree(d, t); } - for (EST_Item *p = daughter1(t); p; p = p->next()) + for (EST_Item *p = daughter1(t); p; p = inext(p)) binaryize_tree(p); } @@ -529,7 +495,7 @@ utt.create_relation(new_tree); copy_relation(*utt.relation(base_tree), *utt.relation(new_tree)); - for (EST_Item *p = utt.relation(new_tree)->head(); p; p = p->next()) + for (EST_Item *p = utt.relation(new_tree)->head(); p; p = inext(p)) binaryize_tree(p); } @@ -555,7 +521,7 @@ add_feature_function(*utt.relation("Syllable"), "start", "standard+unisyn_leaf_start"); - for (EST_Item *s = utt.relation("Syllable")->head(); s; s = s->next()) + for (EST_Item *s = utt.relation("Syllable")->head(); s; s = inext(s)) s->set("time_path", "SylStructure"); EST_Features tf; @@ -617,7 +583,7 @@ m = m->as_relation(second_tree); // swap to a new tree } - for (d = daughter1(m); d; d = d->next()) + for (d = daughter1(m); d; d = inext(d)) { e = p->append_daughter(d); extend_tree(d, e, terminal, second_tree); @@ -651,7 +617,7 @@ { EST_Item *n; - for (n = u.relation(tree)->head(); n; n = n->next()) + for (n = u.relation(tree)->head(); n; n = inext(n)) nsr(n); } @@ -737,7 +703,7 @@ int depth; int max_depth = 0; - for (s = u.relation(base_stream)->head(); s; s = s->next()) + for (s = u.relation(base_stream)->head(); s; s = inext(s)) { depth = 0; for (a = s->as_relation(mettree); parent(a); a = parent(a)) @@ -754,7 +720,7 @@ EST_Item *s; int max_depth = max_tree_depth(u, base_stream, mettree); - for (s = u.relation(base_stream)->head(); s; s = s->next()) + for (s = u.relation(base_stream)->head(); s; s = inext(s)) stress_factor1(s->as_relation(mettree), max_depth); } @@ -801,12 +767,12 @@ // normalise values sv = 0; - for (s = u.relation(base_stream)->head(); s; s = s->next()) + for (s = u.relation(base_stream)->head(); s; s = inext(s)) sv = Lof(s->I("StressVal"), sv); cout << "Max Stress: " << sv << endl; - for (s = u.relation(base_stream)->head(); s; s = s->next()) + for (s = u.relation(base_stream)->head(); s; s = inext(s)) { b = (float)(s->I("StressVal") - sv + 1); if (s->f("MetricalValue") == "s") @@ -822,14 +788,14 @@ EST_Item *s; float max_pf = 0; - for (s = u.relation(base_stream)->head(); s; s = s->next()) + for (s = u.relation(base_stream)->head(); s; s = inext(s)) phrase_factor(*s, mettree); - for (s = u.relation(base_stream)->head(); s; s = s->next()) + for (s = u.relation(base_stream)->head(); s; s = inext(s)) if (s->I("PhraseIndex") > max_pf) max_pf = s->I("PhraseIndex"); - for (s = u.relation(base_stream)->head(); s; s = s->next()) + for (s = u.relation(base_stream)->head(); s; s = inext(s)) { s->set("PhraseFactor", (float)s->I("PhraseIndex")/max_pf); @@ -848,7 +814,7 @@ EST_Item *w; EST_Item *a, *b, *c, *od; - for (w = u.relation("Word")->head(); w != 0; w = w->next()) + for (w = u.relation("Word")->head(); w != 0; w = inext(w)) { if (w->f("pos") == "punc") { @@ -870,7 +836,7 @@ cout << "Threshold = " << threshold << endl; - for (s = u.relation(base_stream)->head(); s; s = s->next()) + for (s = u.relation(base_stream)->head(); s; s = inext(s)) { if (s->F("StressFactor") > threshold) { @@ -929,7 +895,7 @@ word.create_relation("Syllable"); word.create_relation("WordStructure"); - for (w = u.relation("Word")->head(); w != 0; w = w->next()) + for (w = u.relation("Word")->head(); w != 0; w = inext(w)) { word.clear_relations(); @@ -969,10 +935,10 @@ for (n = start; n; n = parent(n)) { // cout << "altering sister\n"; - if (prev(n) != 0) - prev(n)->set("MetricalValue", "w"); - else if (next(n) != 0) - next(n)->set("MetricalValue", "w"); + if (iprev(n) != 0) + iprev(n)->set("MetricalValue", "w"); + else if (inext(n) != 0) + inext(n)->set("MetricalValue", "w"); } } @@ -1092,7 +1058,7 @@ for (w = u->relation("Word")->head(); w != 0; w = n) { - n = w->next(); + n = inext(w); // w->set("start", prev_end); w->f_remove("end"); // prev_end = w->F("end"); @@ -1145,7 +1111,7 @@ if (segfile != "dummy") { - for (s = u->relation("Segment")->head(); s; s = s->next()) + for (s = u->relation("Segment")->head(); s; s = inext(s)) { s->set("start", phone_start); phone_start = s->F("end"); @@ -1164,7 +1130,7 @@ // cout <<"Surface 1:" << *u->relation("SurfacePhone") << endl; - for (i = 0, w = u->relation("Word")->head(); w != 0; w = w->next(), ++i) + for (i = 0, w = u->relation("Word")->head(); w != 0; w = inext(w), ++i) { word.clear_relations(); diff -Nru festival-2.4~release/src/modules/UniSyn_phonology/subword.cc festival-2.5.0/src/modules/UniSyn_phonology/subword.cc --- festival-2.4~release/src/modules/UniSyn_phonology/subword.cc 2010-11-05 14:13:02.000000000 +0000 +++ festival-2.5.0/src/modules/UniSyn_phonology/subword.cc 2017-09-04 15:54:08.000000000 +0000 @@ -65,10 +65,10 @@ n = metricaltree.append(w); - if (next(syllable.head()) == 0) + if (inext(syllable.head()) == 0) return; - for (s = syllable.head(); s ; s = s->next()) + for (s = syllable.head(); s ; s = inext(s)) { cout << "appending syl\n"; n->append_daughter(s); @@ -82,14 +82,14 @@ EST_Item *new_leaf; // single syllable - if (next(syllable.head()) == 0) + if (inext(syllable.head()) == 0) { new_leaf = metricaltree.append(w); return; } // absorb initial unstressed syllables - for (s = syllable.head(); s && (s->f("stress_num") == 0); s = s->next()) + for (s = syllable.head(); s && (s->f("stress_num") == 0); s = inext(s)) { new_leaf = metricaltree.append(s); new_leaf->set("MetricalValue", "w"); @@ -99,14 +99,14 @@ { new_leaf = metricaltree.append(s); new_leaf->set("MetricalValue", "s"); - s = make_foot(w, new_leaf, next(s)); + s = make_foot(w, new_leaf, inext(s)); } if (siod_get_lval("mettree_debug", NULL) != NIL) metricaltree.utt()->save("foot.utt", "est"); s = metricaltree.head(); - make_super_foot(w, s, next(s)); + make_super_foot(w, s, inext(s)); if (siod_get_lval("mettree_debug", NULL) != NIL) metricaltree.utt()->save("super_foot.utt", "est"); @@ -130,11 +130,11 @@ fl = first_leaf(met_node); - if (next(next_syl_node)) + if (inext(next_syl_node)) new_parent = met_node->insert_parent(); else { - if (prev(fl)) + if (iprev(fl)) new_parent = met_node->insert_parent(); else { @@ -150,7 +150,7 @@ } new_parent->append_daughter(next_syl_node); - next_syl_node = make_foot(w, new_parent, next(next_syl_node)); + next_syl_node = make_foot(w, new_parent, inext(next_syl_node)); } return next_syl_node; } @@ -165,7 +165,7 @@ return; // make sure root node is w, i.e. word - if (next(next_syl_node)) + if (inext(next_syl_node)) new_parent = met_node->insert_parent(); else { @@ -182,7 +182,7 @@ new_parent->append_daughter(next_syl_node); - make_super_foot(w, new_parent, next(new_parent)); + make_super_foot(w, new_parent, inext(new_parent)); } @@ -191,7 +191,7 @@ EST_Item *n, *s; int stress_num = -1; - for (s = syllable.head(); s; s = s->next()) + for (s = syllable.head(); s; s = inext(s)) if (s->I("stress_num",0) > stress_num) stress_num = s->I("stress_num"); @@ -199,7 +199,7 @@ for (; stress_num > 0; --stress_num) { - for (s = syllable.head(); s; s = s->next()) + for (s = syllable.head(); s; s = inext(s)) if (s->I("stress_num",0) == stress_num) break; diff -Nru festival-2.4~release/src/modules/UniSyn_phonology/syllabify.cc festival-2.5.0/src/modules/UniSyn_phonology/syllabify.cc --- festival-2.4~release/src/modules/UniSyn_phonology/syllabify.cc 2010-11-05 14:13:02.000000000 +0000 +++ festival-2.5.0/src/modules/UniSyn_phonology/syllabify.cc 2017-11-03 12:54:01.000000000 +0000 @@ -47,7 +47,7 @@ static int nucleus_count(EST_Relation &phone) { int v = 0; - for (EST_Item *l = phone.head(); l; l = l->next()) + for (EST_Item *l = phone.head(); l; l = inext(l)) if (l->S("df.syllabic") == "+") ++v; @@ -112,7 +112,7 @@ EST_Item *p, *onset; // if first syllable in word, put all prevocalic segments in onset - if ((prev(syl_struct_root) == 0) && prev(nucleus)) + if ((iprev(syl_struct_root) == 0) && iprev(nucleus)) { if (flat) onset = syl_struct_root; @@ -123,23 +123,23 @@ } // tmpeorary hack because of lack of prepend daughter fn. EST_Item *s; - for (s = nucleus->prev(); prev(s); s = s->prev()); - for (c1 = s; c1 != nucleus; c1 = next(c1)) + for (s = iprev(nucleus); iprev(s); s = iprev(s)); + for (c1 = s; c1 != nucleus; c1 = inext(c1)) onset->append_daughter(c1); return 0; } - c1 = prev(nucleus); + c1 = iprev(nucleus); if (c1 == 0) return c1; // if (ph_is_vowel(c1->name())) -// return next(c1); +// return inext(c1); if (c1->S("df.syllabic") == "+") - return next(c1); + return inext(c1); // if (c1->S("df.type") == "vowel") -// return next(c1); +// return inext(c1); if (flat) onset = syl_struct_root; @@ -156,7 +156,7 @@ return nucleus; // add second consonant - c2 = prev(c1); + c2 = iprev(c1); if (c2 == 0) return 0; if (legal_c2(c1, c2)) @@ -165,7 +165,7 @@ return c1; // add third consonant (s) - c3 = prev(c2); + c3 = iprev(c2); if (c3 == 0) return 0; @@ -222,7 +222,7 @@ } for (; (first_coda != 0) && (first_coda != first_onset); - first_coda = first_coda->next()) + first_coda = inext(first_coda)) m->append_daughter(first_coda); } @@ -247,7 +247,7 @@ if (count < 1) return 0; - for (prev_syl = 0, l = phone.head(); l; l = l->next()) + for (prev_syl = 0, l = phone.head(); l; l = inext(l)) { // cout << "type " << l->S("name") << ": " << l->S("df.type") << endl; if (l->S("df.syllabic") == "+") @@ -272,7 +272,7 @@ prev_syl = this_syl; prev_struct = this_struct; - first_coda = l->next(); + first_coda = inext(l); } } @@ -341,7 +341,7 @@ else siod_list_to_strlist(car(lpos), lex_def); - for (EST_Litem *sl = lex_def.head(); sl; sl = sl->next()) + for (EST_Litem *sl = lex_def.head(); sl; sl = sl->n) { p = phone.append(); lex_phone = lex_def(sl); @@ -381,7 +381,7 @@ int v = 0; EST_Litem *l; - for (l = full.head(); l; l = l->next()) + for (l = full.head(); l; l = next(l)) if (ph_is_stress_vowel(full(l))) ++v; return v; @@ -468,7 +468,7 @@ static int vowel_count(EST_Relation &phone) { int v = 0; - for (EST_Item *l = phone.head(); l; l = l->next()) + for (EST_Item *l = phone.head(); l; l = inext(l)) if (ph_is_vowel(l->name())) ++v; return v; @@ -489,7 +489,7 @@ festival_error(); } - for (prev_syl = 0, l = word.relation("Phone")->head(); l; l = l->next()) + for (prev_syl = 0, l = word.relation("Phone")->head(); l; l = inext(l)) { cout << "syl: " << l->S("name") << ": " << l->S("df.syllabic", 1) << endl; @@ -508,7 +508,7 @@ prev_syl->as_relation("SylStructure")); prev_syl = this_syl; - first_coda = l->next(); + first_coda = inext(l); } } @@ -558,7 +558,7 @@ // make phone relation siod_list_to_strlist(car(lpos), lex_def); - for (EST_Litem *sl = lex_def.head(); sl; sl = sl->next()) + for (EST_Litem *sl = lex_def.head(); sl; sl = inext(sl)) { p = word.relation("Phone")->append(); lex_phone = lex_def(sl); @@ -615,7 +615,7 @@ // absorb initial unstressed syllables for (s = word.relation("Syllable")->head(); - s && (s->f("stress_num") == 0); s = s->next()) + s && (s->f("stress_num") == 0); s = inext(s)) { // cout << "**1 syl:" << *s << endl; new_leaf = word.relation("MetricalTree")->append(s); @@ -625,7 +625,7 @@ // cout << "utt to now 1c: " << word << endl; // In a multi-syllable word - if (next(word.relation("Syllable")->head())) + if (inext(word.relation("Syllable")->head())) { //s = word.relation("Syllable")->head(); // cout << "**2 syl:" << *s << endl; @@ -635,7 +635,7 @@ cout << "foot iteration\n" << *s << endl << endl; new_leaf = word.relation("MetricalTree")->append(s); new_leaf->set("MetricalValue", "s"); - // s = make_foot(new_leaf, next(s)); + // s = make_foot(new_leaf, inext(s)); } } @@ -653,7 +653,7 @@ s = word.relation("MetricalTree")->head(); - // make_super_foot(s, next(s)); + // make_super_foot(s, inext(s)); if (siod_get_lval("mettree_debug", NULL) != NIL) word.save("super_foot.utt", "est"); diff -Nru festival-2.4~release/src/modules/UniSyn_phonology/UniSyn_build.cc festival-2.5.0/src/modules/UniSyn_phonology/UniSyn_build.cc --- festival-2.4~release/src/modules/UniSyn_phonology/UniSyn_build.cc 2010-11-05 14:13:02.000000000 +0000 +++ festival-2.5.0/src/modules/UniSyn_phonology/UniSyn_build.cc 2017-11-03 12:54:01.000000000 +0000 @@ -127,10 +127,10 @@ EST_Item *t, *p; int r; - prev_phone = w->prev() ? prev(w)->I("phon_ref") : -1; + prev_phone = iprev(w) ? iprev(w)->I("phon_ref") : -1; r = w->I("phon_ref"); - for (t = trans.head(); t; t = t->next()) + for (t = trans.head(); t; t = inext(t)) { if ((t->f("name") == "sil") || (t->f("name") == "pau")) continue; @@ -163,7 +163,7 @@ if (add_words) utt.create_relation("IntonationWord"); - for (t = utt.relation(i_name, 1)->head(); t; t = t->next()) + for (t = utt.relation(i_name, 1)->head(); t; t = inext(t)) { t->f_remove("end"); if (!t->f_present("word_ref")) @@ -173,7 +173,7 @@ w_num = t->S("word_ref"); s_num = t->I("syl_num"); - for (w = utt.relation("Word", 1)->head(); w; w = w->next()) + for (w = utt.relation("Word", 1)->head(); w; w = inext(w)) { if (w->S("id") == w_num) break; @@ -242,7 +242,7 @@ utt.create_relation("IntonationWord"); - for (s = utt.relation("Syllable", 1)->head(); s; s = s->next()) + for (s = utt.relation("Syllable", 1)->head(); s; s = inext(s)) { if (!s->in_relation("IntonationSyllable")) continue; @@ -260,7 +260,7 @@ else b = w->as_relation("IntonationWord"); - for (t = daughter1(s->as_relation("IntonationSyllable")); t; t = t->next()) + for (t = daughter1(s->as_relation("IntonationSyllable")); t; t = inext(t)) b->append_daughter(t); } } @@ -282,7 +282,7 @@ EST_String iname = "Intonation" + rel; for (r = ref.relation(rel, 1)->head(), t = test.relation(rel, 1)->head(); r && t; - r = r->next(), t = t->next()) + r = inext(r), t = inext(t)) { if (legal_daughter(r, iname, valid) && legal_daughter(t, iname, valid)) t->set("i_status", "COR"); @@ -303,7 +303,7 @@ float pos, max, d; EST_String is_name = i_name + s_name; - for (t = utt.relation(i_name, 1)->head(); t; t = t->next()) + for (t = utt.relation(i_name, 1)->head(); t; t = inext(t)) { if (t->in_relation(is_name)) continue; @@ -312,7 +312,7 @@ cout << "here 1\n"; - for (p = utt.relation(s_name)->head(); p; p = p->next()) + for (p = utt.relation(s_name)->head(); p; p = inext(p)) { if (t->S("name","0") == "phrase_end") d = fabs(pos - p->end()); @@ -342,7 +342,7 @@ if (lab.load(int_file) != format_ok) EST_error("Couldn't load file %s\n", (const char *) int_file); - for (s = lab.head(); s; s = s->next()) + for (s = lab.head(); s; s = inext(s)) { n = u->relation("Intonation")->append(); merge_features(n, s, 1); @@ -379,7 +379,7 @@ if (lab.load(word_file) != format_ok) EST_error("Couldn't load file %s\n", (const char *) word_file); - for (s = lab.head(); s; s = s->next()) + for (s = lab.head(); s; s = inext(s)) { s->set("start", p_end); p_end = s->F("end"); @@ -408,12 +408,12 @@ if (f0.load(f0_file) != format_ok) EST_error("Couldn't load file %s\n", (const char *) f0_file); - for (s = u->relation("Segment")->head(); s; s = s->next()) + for (s = u->relation("Segment")->head(); s; s = inext(s)) { - prev_mid = s->prev() ? - (prev(s)->F("end") + prev(s)->F("start"))/2.0 : 0.0; - next_mid = s->next() ? - (next(s)->F("end") + next(s)->F("start"))/2.0 : 0.0; + prev_mid = iprev(s) ? + (iprev(s)->F("end") + iprev(s)->F("start"))/2.0 : 0.0; + next_mid = inext(s) ? + (inext(s)->F("end") + inext(s)->F("start"))/2.0 : 0.0; s->set("prev_mid_f0", f0.a(f0.index(prev_mid))); s->set("start_f0", f0.a(f0.index(s->F("start")))); @@ -443,12 +443,12 @@ frame = new EST_FVector; frame->fill(0.0); // special case for first frame. - for (s = u->relation("Segment")->head(); s; s = s->next()) + for (s = u->relation("Segment")->head(); s; s = inext(s)) { - prev_mid = s->prev() ? - (prev(s)->F("end") + prev(s)->F("start"))/2.0 : 0.0; - next_mid = s->next() ? - (next(s)->F("end") + next(s)->F("start"))/2.0 : 0.0; + prev_mid = iprev(s) ? + (iprev(s)->F("end") + iprev(s)->F("start"))/2.0 : 0.0; + next_mid = inext(s) ? + (inext(s)->F("end") + inext(s)->F("start"))/2.0 : 0.0; frame = new EST_FVector; coef.copy_frame_out(coef.index((s->F("end") + s->F("start"))/2.0), @@ -517,7 +517,7 @@ word.relation("SurfaceSyllable")->length() << " surface syllables\n"; - for (s = word.relation("Syllable")->head(); s; s = s->next()) + for (s = word.relation("Syllable")->head(); s; s = inext(s)) { t = s->as_relation("SylStructure"); n = syl_nucleus(t); @@ -565,7 +565,7 @@ utt.create_relation("LabelSegment"); utt.create_relation("Match"); - for (s = lab.head(); s; s = s->next()) + for (s = lab.head(); s; s = inext(s)) { if (!phone_def.present(s->S("name"))) EST_error("Phone %s is not defined in phone set\n", (const char *) @@ -583,7 +583,7 @@ add_times(*utt.relation("Segment"), *utt.relation("LabelSegment"), *utt.relation("Match")); - for (s = utt.relation("Segment")->head(); s; s = s->next()) + for (s = utt.relation("Segment")->head(); s; s = inext(s)) { s->set("target_dur", (s->F("end") - s->F("start"))); s->f_remove("end"); @@ -613,17 +613,17 @@ cout << "Looking at inserting\n"; // Intermeditate silences r = w->I("phon_ref"); - for (s=utt.relation("LabelSegment")->head(); s; s=s->next()) + for (s=utt.relation("LabelSegment")->head(); s; s=inext(s)) { if (r == s->I("ref")) { - if (next(s)->name() == "pau") + if (inext(s)->name() == "pau") { cout << "actually inserting\n"; EST_Item *sil = utt.relation("Segment")->append(); sil->set("name","pau"); sil->set("start",s->F("end")); - sil->set("end",next(s)->F("end")); + sil->set("end",inext(s)->F("end")); } return; } @@ -655,7 +655,7 @@ phone_start = 0.0; - for (s = lab.head(); s; s = s->next()) + for (s = lab.head(); s; s = inext(s)) { if (!phone_def.present(s->S("name"))) EST_error("Phone %s is not defined in phone set\n", (const char *) @@ -671,7 +671,7 @@ // phonemic_trans(*utt.relation("LabelSegment")); /* for (w = utt.relation("Word")->head(); w != 0; w = n) { - n = w->next(); + n = inext(w); w->f_remove("end"); if ((w->f("name") == "sil") || (w->f("name") == "pau")) utt.relation("Word")->remove_item(w); @@ -706,7 +706,7 @@ add_silences(utt,0); - for (i = 0, w = utt.relation("Word")->head(); w != 0; w = w->next(), ++i) + for (i = 0, w = utt.relation("Word")->head(); w != 0; w = inext(w), ++i) { word.clear_relations(); word.f.set("max_id", 0); @@ -782,11 +782,11 @@ // if silences aren't wanted we still have to build with them so that // start times before pauses are done properly. if (!siod_get_lval("unisyn_build_with_silences",NULL)) - for (s = next(utt.relation("Segment")->head());s;s = s->next()) - if ((prev(s)->S("name") != "pau") && (prev(s)->S("name") != "sil")) + for (s = inext(utt.relation("Segment")->head());s;s = inext(s)) + if ((iprev(s)->S("name") != "pau") && (iprev(s)->S("name") != "sil")) s->set_function("start", "standard+unisyn_start"); else - utt.relation("Segment")->remove_item(prev(s)); + utt.relation("Segment")->remove_item(iprev(s)); diff -Nru festival-2.4~release/src/modules/UniSyn_phonology/UniSyn_phonology.cc festival-2.5.0/src/modules/UniSyn_phonology/UniSyn_phonology.cc --- festival-2.4~release/src/modules/UniSyn_phonology/UniSyn_phonology.cc 2010-11-05 14:13:02.000000000 +0000 +++ festival-2.5.0/src/modules/UniSyn_phonology/UniSyn_phonology.cc 2017-11-03 12:54:01.000000000 +0000 @@ -125,7 +125,7 @@ cout << "Focusing item " << f << " in relation " << relname << endl; - for (i = 1, n = u->relation(relname)->head(); n; n = n->next(), ++i) + for (i = 1, n = u->relation(relname)->head(); n; n = inext(n), ++i) if (i == f) break; @@ -183,7 +183,7 @@ cout << "Footing item " << f << endl; - for (i = 1, n = u->relation("Syllable")->head(); n; n = n->next(), ++i) + for (i = 1, n = u->relation("Syllable")->head(); n; n = inext(n), ++i) if (i == f) break; @@ -250,7 +250,7 @@ u->create_relation(new_tree); - for (m = u->relation(first_tree)->head(); m; m = m->next()) + for (m = u->relation(first_tree)->head(); m; m = inext(m)) { p = u->relation(new_tree)->append(m); extend_tree(m, p, terminal, second_tree); @@ -265,7 +265,7 @@ n->set(keep, 1); - for (EST_Item *p = daughter1(n); p; p = p->next()) + for (EST_Item *p = daughter1(n); p; p = inext(p)) add_keep_nodes(p, keep); } @@ -278,7 +278,7 @@ for (EST_Item *s = daughter1(p); s; s = m) { - m = s->next(); + m = inext(s); if (s != n) s->unref_all(); } @@ -308,7 +308,7 @@ for (s = new_utt->relation(get_c_string(l_relation))->head(); s; s = m) { - m = s->next(); + m = inext(s); if (s != n) s->unref_all(); } @@ -320,21 +320,21 @@ for (s = new_utt->relation("Segment")->head(); s; s = m) { - m = s->next(); + m = inext(s); if (!s->f_present("keep")) s->unref_all(); } for (s = new_utt->relation("Syllable")->head(); s; s = m) { - m = s->next(); + m = inext(s); if (!s->f_present("keep")) s->unref_all(); } for (s = new_utt->relation("Word")->head(); s; s = m) { - m = s->next(); + m = inext(s); if (!s->f_present("keep")) s->unref_all(); } @@ -342,14 +342,14 @@ /* for (s = new_utt->relation("Segment")->head(); s->S("id") != first_leaf(n)->S("id"); s = m) { - m = s->next(); + m = inext(s); cout << "deleting segment :" << s->S("name") << endl; s->unref_all(); } - for (s = next(last_leaf(n)->as_relation("Segment")); s; s = m) + for (s = inext(last_leaf(n)->as_relation("Segment")); s; s = m) { - m = s->next(); + m = inext(s); cout << "deleting segment :" << s->S("name") << endl; s->unref_all(); } @@ -373,13 +373,13 @@ EST_Item *p; EST_Utterance *u = get_c_utt(l_utt); - for (p = u->relation("Word")->head(); p; p = p->next()) + for (p = u->relation("Word")->head(); p; p = inext(p)) p->set("match", p->S("name")); - for (p = u->relation("Segment")->head(); p; p = p->next()) + for (p = u->relation("Segment")->head(); p; p = inext(p)) p->set("match", p->S("name")); - for (p = u->relation("Syllable")->head(); p; p = p->next()) + for (p = u->relation("Syllable")->head(); p; p = inext(p)) add_syllable_name(p, "match"); EST_Features tf; @@ -453,7 +453,7 @@ EST_Utterance *u = get_c_utt(l_utt); EST_Item *s; - for (s = u->relation("MetricalTree")->head(); s; s= s->next()) + for (s = u->relation("MetricalTree")->head(); s; s= inext(s)) legal_metrical_tree(s); return l_utt; @@ -617,7 +617,7 @@ load data."); init_subr_4("intonation_diagnostics", FT_intonation_diagnostics, - "(intonation_diagnostics UTT + "(intonation_diagnostics UTT))\n\ Foot nth item in relation."); // semi redundant diff -Nru festival-2.4~release/src/modules/UniSyn_phonology/unisyn_tilt.cc festival-2.5.0/src/modules/UniSyn_phonology/unisyn_tilt.cc --- festival-2.4~release/src/modules/UniSyn_phonology/unisyn_tilt.cc 2010-11-05 14:13:02.000000000 +0000 +++ festival-2.5.0/src/modules/UniSyn_phonology/unisyn_tilt.cc 2017-09-04 15:54:08.000000000 +0000 @@ -48,7 +48,7 @@ EST_Item *s, *t; float pos; - for (t = u.relation("Intonation")->head(); t; t = t->next()) + for (t = u.relation("Intonation")->head(); t; t = inext(t)) { if (t->as_relation("IntonationSyllable")) { diff -Nru festival-2.4~release/src/modules/UniSyn_phonology/us_aux.cc festival-2.5.0/src/modules/UniSyn_phonology/us_aux.cc --- festival-2.4~release/src/modules/UniSyn_phonology/us_aux.cc 2010-11-05 14:13:02.000000000 +0000 +++ festival-2.5.0/src/modules/UniSyn_phonology/us_aux.cc 2017-09-04 15:54:08.000000000 +0000 @@ -56,7 +56,7 @@ dn = daughtern(s); int n = 1; - for (d = d1; d != dn; d = d->next()) + for (d = d1; d != dn; d = inext(d)) ++n; return n; } diff -Nru festival-2.4~release/src/modules/UniSyn_phonology/us_duration.cc festival-2.5.0/src/modules/UniSyn_phonology/us_duration.cc --- festival-2.4~release/src/modules/UniSyn_phonology/us_duration.cc 2010-11-05 14:13:02.000000000 +0000 +++ festival-2.5.0/src/modules/UniSyn_phonology/us_duration.cc 2017-09-04 15:54:08.000000000 +0000 @@ -52,7 +52,7 @@ void clear_feature(EST_Relation &r, const EST_String &name) { - for (EST_Item *p = r.head(); p ; p = p->next()) + for (EST_Item *p = r.head(); p ; p = inext(p)) p->f_remove(name); } @@ -63,7 +63,7 @@ { float prev_end = 0; - for (EST_Item *p = r.head(); p ; p = p->next()) + for (EST_Item *p = r.head(); p ; p = inext(p)) { p->set("dur", p->F("end") - prev_end); prev_end = p->F("end"); @@ -76,7 +76,7 @@ end_to_dur(*u.relation(seg_name)); - for (s = u.relation(seg_name)->head(); s; s = s->next()) + for (s = u.relation(seg_name)->head(); s; s = inext(s)) s->set("z_score", phone_z_score(s->f("name"), s->F("dur"))); } @@ -87,7 +87,7 @@ float z, n; - for (s = u.relation(syl_name)->head(); s; s = s->next()) + for (s = u.relation(syl_name)->head(); s; s = inext(s)) { p = s->as_relation(st_name); z = 0.0; @@ -112,7 +112,7 @@ { EST_Item *n, *s; - for (s = u.relation(syl_name)->head(); s; s = s->next()) + for (s = u.relation(syl_name)->head(); s; s = inext(s)) { n = named_daughter(s->as_relation(st_name), "sylval", "Rhyme"); n = daughter1(named_daughter(n, "sylval", "Nucleus")); @@ -131,7 +131,7 @@ clear_feature(*utt->relation(rel), "dur"); clear_feature(*utt->relation(rel), "end"); - for (p = utt->relation(rel)->head(); p ; p = p->next()) + for (p = utt->relation(rel)->head(); p ; p = inext(p)) p->set("dur", met_duration.val(p->f("name")).F("mean")); cout << "dur end\n"; @@ -149,7 +149,7 @@ clear_feature(*utt->relation(rel), "dur"); clear_feature(*utt->relation(rel), "end"); - for (EST_Item *p = utt->relation(rel)->head(); p ; p = p->next()) + for (EST_Item *p = utt->relation(rel)->head(); p ; p = inext(p)) p->set("dur", 0.2); dur_to_end(*utt->relation(rel)); diff -Nru festival-2.4~release/src/scripts/default_voices.sh festival-2.5.0/src/scripts/default_voices.sh --- festival-2.4~release/src/scripts/default_voices.sh 1970-01-01 00:00:00.000000000 +0000 +++ festival-2.5.0/src/scripts/default_voices.sh 2017-11-22 13:45:49.000000000 +0000 @@ -0,0 +1,71 @@ +#!/bin/sh +#####################################################-*-mode:shell-script-*- +## ## +## ## +## Language Technologies Institute ## +## Carnegie Mellon University ## +## Copyright (c) 2017 ## +## All Rights Reserved. ## +## ## +## Permission is hereby granted, free of charge, to use and distribute ## +## this software and its documentation without restriction, including ## +## without limitation the rights to use, copy, modify, merge, publish, ## +## distribute, sublicense, and/or sell copies of this work, and to ## +## permit persons to whom this work is furnished to do so, subject to ## +## the following conditions: ## +## 1. The code must retain the above copyright notice, this list of ## +## conditions and the following disclaimer. ## +## 2. Any modifications must be clearly marked as such. ## +## 3. Original authors' names are not deleted. ## +## 4. The authors' names are not used to endorse or promote products ## +## derived from this software without specific prior written ## +## permission. ## +## ## +## CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK ## +## DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ## +## ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ## +## SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE ## +## FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ## +## WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ## +## AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ## +## ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ## +## THIS SOFTWARE. ## +## ## +########################################################################### +## ## +## Download and install the defauly voice and lexicons ## +## ## +########################################################################### + +if [ ! -f ../festival/src/include/festival.h ] +then + echo Not in the right directory: cannot install Festival default voice + echo You should be in the festival source top level directory + echo Where ls -l src/include/festival.h is found + + exit -1 +fi + +if [ ! -d packed ] +then + mkdir packed +fi + +( cd packed; + wget http://www.festvox.org/packed/festival/2.4/voices/festvox_kallpc16k.tar.gz; + wget http://www.festvox.org/packed/festival/2.4/festlex_CMU.tar.gz; + wget http://www.festvox.org/packed/festival/2.4/festlex_POSLEX.tar.gz +) + +THISDIR=`pwd` + +( cd ..; + tar zxvf $THISDIR/packed/festvox_kallpc16k.tar.gz; + tar zxvf $THISDIR/packed/festlex_CMU.tar.gz; + tar zxvf $THISDIR/packed/festlex_POSLEX.tar.gz +) + +exit 0 + + + diff -Nru festival-2.4~release/src/scripts/Makefile festival-2.5.0/src/scripts/Makefile --- festival-2.4~release/src/scripts/Makefile 2001-04-04 11:55:20.000000000 +0000 +++ festival-2.5.0/src/scripts/Makefile 2017-09-04 17:17:45.000000000 +0000 @@ -39,7 +39,7 @@ TOP=../.. DIRNAME=src/scripts -SCRIPTS= festival_server.sh festival_server_control.sh +SCRIPTS= festival_server.sh festival_server_control.sh default_voices.sh EXTRA_SCRIPTS = jsapi_example.sh festival_client_java.sh FILES = Makefile shared_setup_sh shared_setup_prl shared_script $(SCRIPTS) $(EXTRA_SCRIPTS) INSTALL = diff -Nru festival-2.4~release/.time-stamp festival-2.5.0/.time-stamp --- festival-2.4~release/.time-stamp 2014-12-20 15:47:28.000000000 +0000 +++ festival-2.5.0/.time-stamp 2017-12-25 15:07:41.000000000 +0000 @@ -1,2 +1,2 @@ -festival 2.4 -Sat Dec 20 10:47:28 EST 2014 +festival 2.5.0 +Mon Dec 25 10:07:41 EST 2017