Skip to content

Commit

Permalink
solution to ticket:133 -- created a new datatype REALISED_ACOUSTPARAM…
Browse files Browse the repository at this point in the history
…S which is produced from

AUDIO data -- the trick is to store the XML tree in the AUDIO mary data object, together with
the actual audio data.

Targets are adapted to store a reference to a Maryxml Element in addition to an utterance Item;
the way these are filled at the moment is that Items in the Segment relation store a reference
to the DOM Element they were created from, so the createTargets() code has an easy job in trying
to align them. This means that from now on, we can start implementing XML-based target feature
processors, in order to gradually remove the dependency of unit selection code on Utterance
structures.

UnitSelectionSynthesizer, after creating audio, queries the concatenation data objects filled
by the unit concatenator, and updates phone and boundary durations in the XML file accordingly.

Tested with unit selection only; should work with other WaveformSynthesizers as well, just
for them, the same values as for ACOUSTPARAMS will be output in REALISED_ACOUSTPARAMS.

The current method means that when requesting REALISED_ACOUSTPARAMS in addition to AUDIO, 
all processing steps will need to be computed twice; in the future, a caching mechanism can
be used to make this more efficient.



git-svn-id: https://mary.opendfki.de/repos/trunk@827 953a6561-930b-0410-b2a6-db37d1b2ae63
  • Loading branch information
marc1s committed Oct 12, 2007
1 parent 5a42be2 commit eb8807b
Show file tree
Hide file tree
Showing 16 changed files with 305 additions and 35 deletions.
20 changes: 11 additions & 9 deletions java/de/dfki/lt/mary/MaryServer.java
Original file line number Diff line number Diff line change
Expand Up @@ -497,17 +497,19 @@ private boolean handleSynthesisRequest(String inputLine, PrintWriter outputWrite
// Now, the parse is complete.
// this request's id:
id = getID();
// Construct audio format
// Construct audio file format -- even when output is not AUDIO,
// in case we need to pass via audio to get our output type.
AudioFileFormat audioFileFormat = null;
if (audioFileFormatType != null) {
AudioFormat audioFormat = voice.dbAudioFormat();
if (audioFileFormatType.toString().equals("MP3")) {
if (!MaryAudioUtils.canCreateMP3())
throw new UnsupportedAudioFileException("Conversion to MP3 not supported.");
audioFormat = MaryAudioUtils.getMP3AudioFormat();
}
audioFileFormat = new AudioFileFormat(audioFileFormatType, audioFormat, AudioSystem.NOT_SPECIFIED);
if (audioFileFormatType == null) {
audioFileFormatType = AudioFileFormat.Type.WAVE;
}
AudioFormat audioFormat = voice.dbAudioFormat();
if (audioFileFormatType.toString().equals("MP3")) {
if (!MaryAudioUtils.canCreateMP3())
throw new UnsupportedAudioFileException("Conversion to MP3 not supported.");
audioFormat = MaryAudioUtils.getMP3AudioFormat();
}
audioFileFormat = new AudioFileFormat(audioFileFormatType, audioFormat, AudioSystem.NOT_SPECIFIED);

Request request = new Request(inputType, outputType, voice, id, audioFileFormat, streamingAudio);
outputWriter.println(id);
Expand Down
3 changes: 1 addition & 2 deletions java/de/dfki/lt/mary/Request.java
Original file line number Diff line number Diff line change
Expand Up @@ -114,14 +114,13 @@ public Request(MaryDataType inputType, MaryDataType outputType, Voice defaultVoi
this.outputType = outputType;
this.defaultVoice = defaultVoice;
this.id = id;
this.audioFileFormat = audioFileFormat;
this.streamAudio = streamAudio;
if (outputType == MaryDataType.get("AUDIO")) {
if (audioFileFormat == null)
throw new NullPointerException("audio file format is needed for output type AUDIO");
this.audioFileFormat = audioFileFormat;
this.appendableAudioStream = new AppendableSequenceAudioInputStream(audioFileFormat.getFormat(), new ArrayList());
} else {
this.audioFileFormat = null;
this.appendableAudioStream = null;
}
this.logger = Logger.getLogger("R " + id);
Expand Down
45 changes: 45 additions & 0 deletions java/de/dfki/lt/mary/datatypes/REALISED_ACOUSTPARAMS_Definer.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/**
* Copyright 2000-2006 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* Permission is hereby granted, free of charge, to use and distribute
* this software and its documentation without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of this work, and to
* permit persons to whom this work is furnished to do so, subject to
* the following conditions:
*
* 1. The code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
* 2. Any modifications must be clearly marked as such.
* 3. Original authors' names are not deleted.
* 4. The authors' names are not used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* DFKI GMBH AND THE CONTRIBUTORS TO THIS WORK DISCLAIM ALL WARRANTIES WITH
* REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL DFKI GMBH NOR THE
* CONTRIBUTORS BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
* DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
* PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
* THIS SOFTWARE.
*/
package de.dfki.lt.mary.datatypes;

import de.dfki.lt.mary.MaryDataType;
import de.dfki.lt.mary.MaryXML;

/**
* @author Marc Schröder
*
*
*/
public class REALISED_ACOUSTPARAMS_Definer extends MaryDataType {
static {
define("REALISED_ACOUSTPARAMS", null, true, true, MARYXML, MaryXML.MARYXML, null,
null);

}
}
28 changes: 16 additions & 12 deletions java/de/dfki/lt/mary/modules/HalfPhoneTargetFeatureLister.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
import java.util.ArrayList;
import java.util.List;

import org.w3c.dom.Element;

import com.sun.speech.freetts.Item;
import com.sun.speech.freetts.Relation;

Expand All @@ -53,13 +55,14 @@ public HalfPhoneTargetFeatureLister(MaryDataType outputType, String configEntryP
* @param segs the Segment relation
* @return a list of Target objects -- in this case, halfphone targets
*/
protected List createTargets(Relation segs)
protected List<Target> createTargets(Relation segs)
{
List targets = new ArrayList();
List<Target> targets = new ArrayList<Target>();
for (Item s = segs.getHead(); s != null; s = s.getNext()) {
Element maryxmlElement = (Element) s.getFeatures().getObject("maryxmlElement");
String segName = s.getFeatures().getString("name");
targets.add(new HalfPhoneTarget(segName+"_L", s, true)); // left half
targets.add(new HalfPhoneTarget(segName+"_R", s, false)); // right half
targets.add(new HalfPhoneTarget(segName+"_L", maryxmlElement, s, true)); // left half
targets.add(new HalfPhoneTarget(segName+"_R", maryxmlElement, s, false)); // right half
}
return targets;
}
Expand All @@ -70,19 +73,20 @@ protected List createTargets(Relation segs)
* @param segs the Segment relation
* @return a list of Target objects
*/
protected List createTargetsWithPauses(Relation segs) {
List targets = new ArrayList();
protected List<Target> createTargetsWithPauses(Relation segs) {
List<Target> targets = new ArrayList<Target>();
boolean first = true;
Item s = segs.getHead();
Voice v = FreeTTSVoices.getMaryVoice(s.getUtterance().getVoice());
String silenceSymbol = v.sampa2voice("_");
Target lastTarget = null;
Item lastItem = s;
for (; s != null; s = s.getNext()) {
Element maryxmlElement = (Element) s.getFeatures().getObject("maryxmlElement");
//create next target
String segName = s.getFeatures().getString("name");
Target nextLeftTarget = new HalfPhoneTarget(segName+"_L", s, true);
Target nextRightTarget = new HalfPhoneTarget(segName+"_R", s, false);
Target nextLeftTarget = new HalfPhoneTarget(segName+"_L", maryxmlElement, s, true);
Target nextRightTarget = new HalfPhoneTarget(segName+"_R", maryxmlElement, s, false);
//if first target is not a pause, add one
if (first){
first = false;
Expand All @@ -96,8 +100,8 @@ protected List createTargetsWithPauses(Relation segs) {
newPauseItem.getFeatures().setString("name", silenceSymbol);

//add new targets for item
targets.add(new HalfPhoneTarget(silenceSymbol+"_L", newPauseItem, true));
targets.add(new HalfPhoneTarget(silenceSymbol+"_R", newPauseItem, false));
targets.add(new HalfPhoneTarget(silenceSymbol+"_L", null, newPauseItem, true));
targets.add(new HalfPhoneTarget(silenceSymbol+"_R", null, newPauseItem, false));
}
}
targets.add(nextLeftTarget);
Expand All @@ -113,8 +117,8 @@ protected List createTargetsWithPauses(Relation segs) {
newPauseItem.getFeatures().setString("name", silenceSymbol);

//add new targets for item
targets.add(new HalfPhoneTarget(silenceSymbol+"_L", newPauseItem, true));
targets.add(new HalfPhoneTarget(silenceSymbol+"_R", newPauseItem, false));
targets.add(new HalfPhoneTarget(silenceSymbol+"_L", null, newPauseItem, true));
targets.add(new HalfPhoneTarget(silenceSymbol+"_R", null, newPauseItem, false));
}
return targets;
}
Expand Down
80 changes: 80 additions & 0 deletions java/de/dfki/lt/mary/modules/RealisedAcoustparamsExtractor.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/**
* Copyright 2000-2006 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* Permission is hereby granted, free of charge, to use and distribute
* this software and its documentation without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of this work, and to
* permit persons to whom this work is furnished to do so, subject to
* the following conditions:
*
* 1. The code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
* 2. Any modifications must be clearly marked as such.
* 3. Original authors' names are not deleted.
* 4. The authors' names are not used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* DFKI GMBH AND THE CONTRIBUTORS TO THIS WORK DISCLAIM ALL WARRANTIES WITH
* REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL DFKI GMBH NOR THE
* CONTRIBUTORS BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
* DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
* PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
* THIS SOFTWARE.
*/
package de.dfki.lt.mary.modules;

// DOM classes
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Vector;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.traversal.DocumentTraversal;
import org.w3c.dom.traversal.NodeFilter;
import org.w3c.dom.traversal.NodeIterator;

import de.dfki.lt.mary.MaryData;
import de.dfki.lt.mary.MaryDataType;
import de.dfki.lt.mary.MaryXML;
import de.dfki.lt.mary.modules.synthesis.MBROLAPhoneme;
import de.dfki.lt.mary.modules.synthesis.MbrolaVoice;
import de.dfki.lt.mary.modules.synthesis.Voice;
import de.dfki.lt.mary.util.MaryUtils;
import de.dfki.lt.mary.util.dom.MaryDomUtils;
import de.dfki.lt.mary.util.dom.NameNodeFilter;

/**
* Transforms a full MaryXML document into an MBROLA format string
*
* @author Marc Schr&ouml;der
*/

public class RealisedAcoustparamsExtractor extends InternalModule
{
public RealisedAcoustparamsExtractor()
{
super("Realised acoustparams extractor",
MaryDataType.get("AUDIO"),
MaryDataType.get("REALISED_ACOUSTPARAMS")
);
}

public MaryData process(MaryData d)
throws Exception
{
Document doc = d.getDocument();
MaryData result = new MaryData(outputType());
result.setDocument(doc);
return result;
}

}
4 changes: 3 additions & 1 deletion java/de/dfki/lt/mary/modules/Synthesis.java
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ public MaryData process(MaryData d)
throws Exception
{
// We produce audio data, so we expect some helpers in our input:
assert d.getAudioFileFormat() != null;
assert d.getAudioFileFormat() != null : "Audio file format is not set!";
Document doc = d.getDocument();
// As the input may contain multipe voice sections,
// the challenge in this method is to join the audio data
Expand All @@ -135,6 +135,8 @@ public MaryData process(MaryData d)
}

MaryData result = new MaryData(outputType());
// Also remember XML document in "AUDIO" output data, to keep track of phone durations:
result.setDocument(doc);
result.setAudioFileFormat(d.getAudioFileFormat());
if (d.getAudio() != null) {
// This (empty) AppendableSequenceAudioInputStream object allows a
Expand Down
2 changes: 2 additions & 0 deletions java/de/dfki/lt/mary/modules/XML2UttBase.java
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,7 @@ protected String addOneElement(Utterance utterance, Element element,
}
float end = prevEnd + dur * 0.001f;
Item segItem = segmentRelation.appendItem();
segItem.getFeatures().setObject("maryxmlElement", element);
// Silence symbol in voice-specific phonetic alphabet:
String silence = maryVoice.sampa2voice("_");
assert silence != null;
Expand Down Expand Up @@ -567,6 +568,7 @@ protected Item createSylStructure(Item wordItem, Element t, boolean createTarget
Element segElement = null;
while ((segElement = (Element) segIt.nextNode()) != null) {
Item segItem = segRelation.appendItem();
segItem.getFeatures().setObject("maryxmlElement", segElement);
sylStructSylItem.addDaughter(segItem);
String sampa = segElement.getAttribute("p");
assert !sampa.equals("");
Expand Down
2 changes: 1 addition & 1 deletion java/de/dfki/lt/mary/unitselection/DiphoneTarget.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ public class DiphoneTarget extends Target {

public DiphoneTarget(HalfPhoneTarget left, HalfPhoneTarget right)
{
super(null, null);
super(null, null, null);
this.name = left.name.substring(0, left.name.lastIndexOf("_"))
+ "-" + right.name.substring(0, right.name.lastIndexOf("_"));
assert left.isRightHalf(); // the left half of this diphone must be the right half of a phone
Expand Down
11 changes: 7 additions & 4 deletions java/de/dfki/lt/mary/unitselection/DiphoneUnitSelector.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
import java.util.ArrayList;
import java.util.List;

import org.w3c.dom.Element;

import com.sun.speech.freetts.Item;
import com.sun.speech.freetts.ItemContents;
import com.sun.speech.freetts.Relation;
Expand Down Expand Up @@ -64,12 +66,13 @@ protected List<Target> createTargets(Relation segs)

Item initialSilence = new Item(segs, new ItemContents());
initialSilence.getFeatures().setFloat("end", 0.001f);
HalfPhoneTarget prev = new HalfPhoneTarget(silenceSymbol+"_R", initialSilence, false);
HalfPhoneTarget prev = new HalfPhoneTarget(silenceSymbol+"_R", null, initialSilence, false);
for (Item s = segs.getHead(); s != null; s = s.getNext()) {
Element maryxmlElement = (Element) s.getFeatures().getObject("maryxmlElement");
String segName = s.getFeatures().getString("name");
String sampa = FreeTTSVoices.getMaryVoice(s.getUtterance().getVoice()).voice2sampa(segName);
HalfPhoneTarget leftHalfPhone = new HalfPhoneTarget(sampa+"_L", s, true); // left half
HalfPhoneTarget rightHalfPhone = new HalfPhoneTarget(sampa+"_R", s, false); // right half
HalfPhoneTarget leftHalfPhone = new HalfPhoneTarget(sampa+"_L", maryxmlElement, s, true); // left half
HalfPhoneTarget rightHalfPhone = new HalfPhoneTarget(sampa+"_R", maryxmlElement, s, false); // right half
targets.add(new DiphoneTarget(prev, leftHalfPhone));
prev = rightHalfPhone;
}
Expand All @@ -79,7 +82,7 @@ protected List<Target> createTargets(Relation segs)
float prevEnd = prev.getItem().getFeatures().getFloat("end");
Item finalSilence = new Item(segs, new ItemContents());
finalSilence.getFeatures().setFloat("end", prevEnd+0.001f);
HalfPhoneTarget silence = new HalfPhoneTarget(silenceSymbol+"_L", finalSilence, true);
HalfPhoneTarget silence = new HalfPhoneTarget(silenceSymbol+"_L", null, finalSilence, true);
targets.add(new DiphoneTarget(prev, silence));
}
return targets;
Expand Down
6 changes: 4 additions & 2 deletions java/de/dfki/lt/mary/unitselection/HalfPhoneTarget.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
*/
package de.dfki.lt.mary.unitselection;

import org.w3c.dom.Element;

import com.sun.speech.freetts.Item;
import com.sun.speech.freetts.Relation;

Expand All @@ -47,9 +49,9 @@ public class HalfPhoneTarget extends Target
* @param isLeftHalf true if this target represents the left half
* of the phone, false if it represents the right half of the phone
*/
public HalfPhoneTarget(String name, Item item, boolean isLeftHalf)
public HalfPhoneTarget(String name, Element maryxmlElement, Item item, boolean isLeftHalf)
{
super(name, item);
super(name, maryxmlElement, item);
this.isLeftHalf = isLeftHalf;
}

Expand Down
7 changes: 5 additions & 2 deletions java/de/dfki/lt/mary/unitselection/HalfPhoneUnitSelector.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
import java.util.ArrayList;
import java.util.List;

import org.w3c.dom.Element;

import com.sun.speech.freetts.Item;
import com.sun.speech.freetts.Relation;

Expand Down Expand Up @@ -59,10 +61,11 @@ protected List<Target> createTargets(Relation segs)
{
List<Target> targets = new ArrayList<Target>();
for (Item s = segs.getHead(); s != null; s = s.getNext()) {
Element maryxmlElement = (Element) s.getFeatures().getObject("maryxmlElement");
String segName = s.getFeatures().getString("name");
String sampa = FreeTTSVoices.getMaryVoice(s.getUtterance().getVoice()).voice2sampa(segName);
targets.add(new HalfPhoneTarget(sampa+"_L", s, true)); // left half
targets.add(new HalfPhoneTarget(sampa+"_R", s, false)); // right half
targets.add(new HalfPhoneTarget(sampa+"_L", maryxmlElement, s, true)); // left half
targets.add(new HalfPhoneTarget(sampa+"_R", maryxmlElement, s, false)); // right half
}
return targets;
}
Expand Down
Loading

0 comments on commit eb8807b

Please sign in to comment.