-
Notifications
You must be signed in to change notification settings - Fork 587
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Adds FeatureExtractor and corresponding test class; #37
base: master
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,146 @@ | ||
package edu.cmu.sphinx.api; | ||
|
||
import java.io.IOException; | ||
import java.io.InputStream; | ||
import java.util.ArrayList; | ||
import java.util.List; | ||
import java.util.logging.Logger; | ||
|
||
import edu.cmu.sphinx.frontend.Data; | ||
import edu.cmu.sphinx.frontend.DataEndSignal; | ||
import edu.cmu.sphinx.frontend.DataProcessingException; | ||
import edu.cmu.sphinx.frontend.DoubleData; | ||
import edu.cmu.sphinx.frontend.FloatData; | ||
import edu.cmu.sphinx.frontend.FrontEnd; | ||
import edu.cmu.sphinx.frontend.util.StreamDataSource; | ||
import edu.cmu.sphinx.util.props.ConfigurationManager; | ||
|
||
/** | ||
* Extracts features from input stream | ||
* | ||
* @author Vladisav Jelisavcic | ||
* | ||
*/ | ||
class FeatureExtractor { | ||
private FrontEnd frontEnd; | ||
private StreamDataSource audioSource; | ||
private int featureLength = -1; | ||
|
||
/** The logger for this class */ | ||
private static final Logger logger = Logger | ||
.getLogger("edu.cmu.sphinx.api.FeatureExtractor"); | ||
|
||
/** | ||
* Constructs a FeatureExtractor. | ||
* | ||
* @param cm | ||
* the configuration manager | ||
* @param frontEndName | ||
* the name for the frontEnd to be used | ||
* @param inputStream | ||
* data stream | ||
* | ||
* @throws IOException if error occurred | ||
*/ | ||
public FeatureExtractor(ConfigurationManager cm, String frontEndName, InputStream inputStream) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We do not expose configuration managers in high-level API, it is better to make all those details private. Like I wrote in the original prototype, it is better to support different sample rates. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok, but if I understand correctly, then the user can't choose the FrontEnd Thanks, On Thu, Sep 10, 2015 at 1:57 PM, Nickolay V. Shmyrev <
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, configuration must be hidden. Our high-level API hides default.config.xml in the same way. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hi, I fixed the issues you mentioned, can you please review changes I made? Thanks, On Sat, Sep 12, 2015 at 11:08 AM, Nickolay V. Shmyrev <
|
||
throws IOException { | ||
|
||
if(cm.lookup(frontEndName) == null) { | ||
throw new RuntimeException("No such frontend: " + frontEndName); | ||
} | ||
|
||
frontEnd = (FrontEnd) cm.lookup(frontEndName); | ||
audioSource = (StreamDataSource) cm.lookup("streamDataSource"); | ||
audioSource.setInputStream(inputStream); | ||
} | ||
|
||
/** | ||
* Extracts all features from the supplied InputStream. | ||
* | ||
* @return float[][] when called first time, null otherwise | ||
* | ||
* @throws DataProcessinException if error occurred | ||
*/ | ||
public float[][] getAllFeatures() throws DataProcessingException { | ||
List<float[]> featureList = new ArrayList<float[]>(); | ||
|
||
Data feature = frontEnd.getData(); | ||
if(feature == null) | ||
return null; | ||
|
||
while (!(feature instanceof DataEndSignal)) { | ||
if (feature instanceof DoubleData) { | ||
double[] featureData = ((DoubleData) feature).getValues(); | ||
if (featureLength < 0) { | ||
featureLength = featureData.length; | ||
logger.info("Feature length: " + featureLength); | ||
} | ||
float[] convertedData = new float[featureData.length]; | ||
for (int i = 0; i < featureData.length; i++) { | ||
convertedData[i] = (float) featureData[i]; | ||
} | ||
featureList.add(convertedData); | ||
} else if (feature instanceof FloatData) { | ||
float[] featureData = ((FloatData) feature).getValues(); | ||
if (featureLength < 0) { | ||
featureLength = featureData.length; | ||
logger.info("Feature length: " + featureLength); | ||
} | ||
featureList.add(featureData); | ||
} | ||
feature = frontEnd.getData(); | ||
} | ||
|
||
float[][] allFeatures = new float[featureList.size()][]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is ok to return feature list as list, not necessary an array. It's better to make sure code is properly indented too. |
||
for(int i=0;i<featureList.size();i++){ | ||
allFeatures[i] = featureList.get(i); | ||
} | ||
return allFeatures; | ||
|
||
} | ||
|
||
|
||
/** | ||
* Extracts a single feature frame from the supplied InputStream. | ||
* | ||
* @return float[] or null if end of stream reached | ||
* | ||
* @throws DataProcessinException if error occurred | ||
*/ | ||
public float[] getNextFeatureFrame() throws DataProcessingException{ | ||
Data feature = frontEnd.getData(); | ||
if(feature == null) | ||
return null; | ||
|
||
while (!(feature instanceof DoubleData || feature instanceof FloatData)) { | ||
feature = frontEnd.getData(); | ||
if(feature == null) | ||
return null; | ||
} | ||
|
||
|
||
if (feature instanceof DoubleData) { | ||
double[] featureData = ((DoubleData) feature).getValues(); | ||
if (featureLength < 0) { | ||
featureLength = featureData.length; | ||
logger.info("Feature length: " + featureLength); | ||
} | ||
float[] convertedData = new float[featureData.length]; | ||
for (int i = 0; i < featureData.length; i++) { | ||
convertedData[i] = (float) featureData[i]; | ||
} | ||
return convertedData; | ||
} else if (feature instanceof FloatData) { | ||
float[] featureData = ((FloatData) feature).getValues(); | ||
if (featureLength < 0) { | ||
featureLength = featureData.length; | ||
logger.info("Feature length: " + featureLength); | ||
} | ||
return featureData; | ||
} | ||
return null; | ||
|
||
} | ||
|
||
|
||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No newline in the end of file |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
/** | ||
* | ||
*/ | ||
package edu.cmu.sphinx.api; | ||
|
||
import static org.testng.AssertJUnit.assertEquals; | ||
|
||
import java.io.IOException; | ||
import java.io.InputStream; | ||
import java.net.URL; | ||
import java.util.Scanner; | ||
|
||
import org.testng.annotations.AfterTest; | ||
import org.testng.annotations.BeforeTest; | ||
import org.testng.annotations.Test; | ||
|
||
import edu.cmu.sphinx.util.props.ConfigurationManager; | ||
|
||
|
||
/** | ||
* @author Vladisav Jelisavcic | ||
* | ||
*/ | ||
public class FeatureExtractorTest { | ||
|
||
static final String FRONTEND_NAME = "cepstraFrontEnd"; | ||
private float[] features; | ||
|
||
@BeforeTest | ||
public void setUp(){ | ||
InputStream asciiStream = FeatureExtractorTest.class | ||
.getResourceAsStream("/edu/cmu/sphinx/api/10001-90210-01803.features"); | ||
Scanner sc = new Scanner(asciiStream); | ||
int numDataPoints = sc.nextInt(); | ||
System.out.println(numDataPoints); | ||
|
||
features = new float[numDataPoints]; | ||
int i = 0; | ||
while(sc.hasNextFloat()){ | ||
features[i++] = sc.nextFloat(); | ||
} | ||
sc.close(); | ||
|
||
} | ||
|
||
@AfterTest | ||
public void tearDown(){ | ||
|
||
} | ||
|
||
/** | ||
* Test method for {@link edu.cmu.sphinx.api.FeatureExtractor#getAllFeatures()}. | ||
* @throws IOException | ||
*/ | ||
@Test | ||
public void testGetAllFeatures() throws IOException { | ||
URL url = FeatureExtractorTest.class | ||
.getResource("/edu/cmu/sphinx/tools/feature/frontend.config.xml"); | ||
|
||
ConfigurationManager cm = new ConfigurationManager(url); | ||
|
||
InputStream audioStream = FeatureExtractorTest.class | ||
.getResourceAsStream("/edu/cmu/sphinx/tools/bandwidth/10001-90210-01803.wav"); | ||
|
||
FeatureExtractor fe = new FeatureExtractor(cm,FRONTEND_NAME,audioStream); | ||
float[][] data = fe.getAllFeatures(); | ||
|
||
int numSamples = data.length; | ||
int numFeatures = data[0].length; | ||
|
||
int numDataPoints = numSamples * numFeatures; | ||
|
||
assertEquals(features.length,numDataPoints); // check if all data points are loaded | ||
for(int i=0;i<numSamples;i++){ | ||
for(int j=0;j<numFeatures;j++){ | ||
assertEquals(features[i*numFeatures+j],data[i][j]); | ||
} | ||
} | ||
} | ||
|
||
/** | ||
* Test method for {@link edu.cmu.sphinx.api.FeatureExtractor#getNextFeatureFrame()}. | ||
* @throws IOException | ||
*/ | ||
@Test | ||
public void testGetNextFeatureFrame() throws IOException { | ||
URL url = FeatureExtractorTest.class | ||
.getResource("/edu/cmu/sphinx/tools/feature/frontend.config.xml"); | ||
|
||
ConfigurationManager cm = new ConfigurationManager(url); | ||
|
||
InputStream audioStream = FeatureExtractorTest.class | ||
.getResourceAsStream("/edu/cmu/sphinx/tools/bandwidth/10001-90210-01803.wav"); | ||
|
||
FeatureExtractor fe = new FeatureExtractor(cm,FRONTEND_NAME,audioStream); | ||
|
||
int numDataPoints = 0; | ||
float[] data; | ||
while((data = fe.getNextFeatureFrame()) != null){ | ||
for(int i=0;i<data.length;i++){ | ||
assertEquals(features[i+numDataPoints],data[i]); | ||
} | ||
numDataPoints += data.length; | ||
} | ||
|
||
assertEquals(features.length,numDataPoints); // check if all data points are loaded | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Space missing here |
||
} | ||
|
||
} |
Large diffs are not rendered by default.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sphinx4 uses it's own logging framework, it is better to stick to that one. In high-level API we do not use logging.