Skip to content

Commit

Permalink
embr
Browse files Browse the repository at this point in the history
  • Loading branch information
guoli-ye committed Jun 20, 2018
1 parent 830b6f9 commit 4018e1e
Show file tree
Hide file tree
Showing 65 changed files with 30,230 additions and 455 deletions.
1 change: 1 addition & 0 deletions CNTK.sln
Original file line number Diff line number Diff line change
Expand Up @@ -2216,6 +2216,7 @@ Global
{292FF4EE-D9DD-4BA7-85F7-6A22148D1E01}.Debug_CpuOnly|x64.ActiveCfg = Debug|Any CPU
{292FF4EE-D9DD-4BA7-85F7-6A22148D1E01}.Debug_UWP|x64.ActiveCfg = Debug|Any CPU
{292FF4EE-D9DD-4BA7-85F7-6A22148D1E01}.Debug|x64.ActiveCfg = Debug|Any CPU
{292FF4EE-D9DD-4BA7-85F7-6A22148D1E01}.Debug|x64.Build.0 = Debug|Any CPU
{292FF4EE-D9DD-4BA7-85F7-6A22148D1E01}.Release_CpuOnly|x64.ActiveCfg = Release|Any CPU
{292FF4EE-D9DD-4BA7-85F7-6A22148D1E01}.Release_NoOpt|x64.ActiveCfg = Release|Any CPU
{292FF4EE-D9DD-4BA7-85F7-6A22148D1E01}.Release_UWP|x64.ActiveCfg = Release|Any CPU
Expand Down
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -784,6 +784,9 @@ HTKMLFREADER_SRC =\
$(SOURCEDIR)/Readers/HTKMLFReader/DataWriterLocal.cpp \
$(SOURCEDIR)/Readers/HTKMLFReader/HTKMLFReader.cpp \
$(SOURCEDIR)/Readers/HTKMLFReader/HTKMLFWriter.cpp \
# $(SOURCEDIR)/Common/File.cpp \
# $(SOURCEDIR)/Common/fileutil.cpp \
# $(SOURCEDIR)/Common/ExceptionWithCallStack.cpp \
HTKMLFREADER_OBJ := $(patsubst %.cpp, $(OBJDIR)/%.o, $(HTKMLFREADER_SRC))
Expand Down
Binary file not shown.
6 changes: 3 additions & 3 deletions Source/CNTK/BrainScript/Doc/Notes.txt
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ TIMIT_TrainSimple = new TrainAction [ // new: added TrainAction; t
//L3 = SBFF(L2,hiddenDim,hiddenDim)
//CE = SMBFF(L3,labelDim,hiddenDim,myLabels,tag=Criteria)
//Err = ClassificationError(myLabels,CE.BFF.FF.P,tag=Eval)
//logPrior = LogPrior(myLabels)
//logPrior = LogPrior(myLabels)
//ScaledLogLikelihood=Minus(CE.BFF.FF.P,logPrior,tag=Output)

// new:
Expand Down Expand Up @@ -282,7 +282,7 @@ TIMIT_TrainSimple = new TrainAction [ // new: added TrainAction; t
Err = ClassificationError(myLabels, outZ)

// define output node for decoding
logPrior = LogPrior(myLabels)
logPrior = LogPrior(myLabels)
ScaledLogLikelihood = outZ - logPrior // before: Minus(CE.BFF.FF.P,logPrior,tag=Output)
]
]
Expand Down Expand Up @@ -395,6 +395,6 @@ network = new NDL [
Err = ClassificationError(myLabels, outZ)

// define output node for decoding
logPrior = LogPrior(myLabels)
logPrior = LogPrior(myLabels)
ScaledLogLikelihood = outZ - logPrior // before: Minus(CE.BFF.FF.P,logPrior,tag=Output)
]
5 changes: 2 additions & 3 deletions Source/CNTK/CNTK.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -606,7 +606,7 @@ static void PrintBanner(int argc, wchar_t* argv[], const string& timestamp)
fprintf(stderr, "%s %.6s, ", _BUILDBRANCH_, _BUILDSHA1_);
#endif
fprintf(stderr, "%s %s", __DATE__, __TIME__); // build time
fprintf(stderr, ") at %s\n\n", timestamp.c_str());
fprintf(stderr, ") on %s at %s\n\n", GetHostName().c_str(), timestamp.c_str());
for (int i = 0; i < argc; i++)
fprintf(stderr, "%*s%ls", i > 0 ? 2 : 0, "", argv[i]); // use 2 spaces for better visual separability
fprintf(stderr, "\n");
Expand All @@ -617,8 +617,7 @@ int wmainOldCNTKConfig(int argc, wchar_t* argv[])
{
std::string timestamp = TimeDateStamp();
PrintBanner(argc, argv, timestamp);

ConfigParameters config;
ConfigParameters config;
std::string rawConfigString = ConfigParameters::ParseCommandLine(argc, argv, config); // get the command param set they want

int traceLevel = config(L"traceLevel", 0);
Expand Down
22 changes: 11 additions & 11 deletions Source/CNTK/NdlScript.txt
Original file line number Diff line number Diff line change
Expand Up @@ -488,11 +488,11 @@ ndlMacroUseCNNAuto=[
]

ndlRnnNetwork=[
#define basic i/o
featDim=1845
labelDim=183
hiddenDim=2048
features=Input(featDim, tag=feature)
#define basic i/o
featDim=1845
labelDim=183
hiddenDim=2048
features=Input(featDim, tag=feature)
labels=Input(labelDim, tag=label)

MeanVarNorm(x)=[
Expand All @@ -502,9 +502,9 @@ ndlRnnNetwork=[
]

# define network
featNorm = MeanVarNorm(features)
featNorm = MeanVarNorm(features)
W0 = Parameter(hiddenDim, featDim)
L1 = Times(W0,featNorm)
L1 = Times(W0,featNorm)

W = Parameter(hiddenDim, hiddenDim)

Expand All @@ -515,8 +515,8 @@ ndlRnnNetwork=[
Output = Times(W2, Dout)
criterion = CrossEntropyWithSoftmax(labels, Output, tag=Criteria)

#CE = SMBFF(Dout,labelDim,hiddenDim,labels,tag=Criteria)
#Err = ErrorPrediction(labels,CE.BFF.FF.P,tag=Eval)
#CE = SMBFF(Dout,labelDim,hiddenDim,labels,tag=Criteria)
#Err = ErrorPrediction(labels,CE.BFF.FF.P,tag=Eval)

LogPrior(labels)
{
Expand All @@ -525,8 +525,8 @@ ndlRnnNetwork=[
}

# define output (scaled loglikelihood)
logPrior = LogPrior(labels)
#ScaledLogLikelihood=Minus(CE.BFF.FF.P,logPrior,tag=Output)
logPrior = LogPrior(labels)
#ScaledLogLikelihood=Minus(CE.BFF.FF.P,logPrior,tag=Output)
# rootNodes defined here temporarily so we pass
OutputNodes=(criterion)
EvalNodes=(criterion)
Expand Down
1 change: 1 addition & 0 deletions Source/CNTKv2LibraryDll/CNTKv2LibraryDll.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
<Keyword>Win32Proj</Keyword>
<RootNamespace>CNTKv2LibraryDll</RootNamespace>
<ProjectName>CNTKv2LibraryDll</ProjectName>
<WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
</PropertyGroup>
<Import Project="$(SolutionDir)\CNTK.Cpp.props" />
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
Expand Down
16 changes: 14 additions & 2 deletions Source/Common/DataReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -274,11 +274,17 @@ bool DataReader::GetMinibatch(StreamMinibatchInputs& matrices)
// uids - lables stored in size_t vector instead of ElemType matrix
// boundary - phone boundaries
// returns - true if there are more minibatches, false if no more minibatches remain
bool DataReader::GetMinibatch4SE(std::vector<shared_ptr<const msra::dbn::latticepair>>& latticeinput, vector<size_t>& uids, vector<size_t>& boundaries, vector<size_t>& extrauttmap)
/* guoye: start */
// bool DataReader::GetMinibatch4SE(std::vector<shared_ptr<const msra::dbn::latticepair>>& latticeinput, vector<size_t>& uids, vector<size_t>& boundaries, vector<size_t>& extrauttmap)
bool DataReader::GetMinibatch4SE(std::vector<shared_ptr<const msra::dbn::latticepair>>& latticeinput, vector<size_t>& uids, vector<size_t>& wids, vector<short>& nws, vector<size_t>& boundaries, vector<size_t>& extrauttmap)
/* guoye: end */
{
bool bRet = true;
for (size_t i = 0; i < m_ioNames.size(); i++)
bRet &= m_dataReaders[m_ioNames[i]]->GetMinibatch4SE(latticeinput, uids, boundaries, extrauttmap);
/* guoye: start */
// bRet &= m_dataReaders[m_ioNames[i]]->GetMinibatch4SE(latticeinput, uids, boundaries, extrauttmap);
bRet &= m_dataReaders[m_ioNames[i]]->GetMinibatch4SE(latticeinput, uids, wids, nws, boundaries, extrauttmap);
/* guoye: end */
return bRet;
}

Expand All @@ -288,8 +294,14 @@ bool DataReader::GetMinibatch4SE(std::vector<shared_ptr<const msra::dbn::lattice
bool DataReader::GetHmmData(msra::asr::simplesenonehmm* hmm)
{
bool bRet = true;
// fprintf(stderr, "DataReader::GetHmmData: debug 1, m_ioNames.size() = %d \n", int(m_ioNames.size()));

for (size_t i = 0; i < m_ioNames.size(); i++)
{
//fprintf(stderr, "DataReader::GetHmmData: debug 2, i = %d , m_ioNames[i] = %ls \n", int(i), m_ioNames[i].c_str());
bRet &= m_dataReaders[m_ioNames[i]]->GetHmmData(hmm);
// fprintf(stderr, "DataReader::GetHmmData: debug 3, i = %d \n", int(i));
}
return bRet;
}

Expand Down
11 changes: 9 additions & 2 deletions Source/Common/Include/DataReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,10 @@ class DATAREADER_API IDataReader
}

virtual bool GetMinibatch(StreamMinibatchInputs& matrices) = 0;
virtual bool GetMinibatch4SE(std::vector<shared_ptr<const msra::dbn::latticepair>>& /*latticeinput*/, vector<size_t>& /*uids*/, vector<size_t>& /*boundaries*/, vector<size_t>& /*extrauttmap*/)
/* guoye: start */
// virtual bool GetMinibatch4SE(std::vector<shared_ptr<const msra::dbn::latticepair>>& /*latticeinput*/, vector<size_t>& /*uids*/, vector<size_t>& /*boundaries*/, vector<size_t>& /*extrauttmap*/)
virtual bool GetMinibatch4SE(std::vector<shared_ptr<const msra::dbn::latticepair>>& /*latticeinput*/, vector<size_t>& /*uids*/, vector<size_t>& /*wids*/, vector<short>& /*nws*/, vector<size_t>& /*boundaries*/, vector<size_t>& /*extrauttmap*/)
/* guoye: end */
{
NOT_IMPLEMENTED;
};
Expand Down Expand Up @@ -444,7 +447,11 @@ class DataReader : public IDataReader, protected Plugin, public ScriptableObject
// [out] each matrix resized if necessary containing data.
// returns - true if there are more minibatches, false if no more minibatches remain
virtual bool GetMinibatch(StreamMinibatchInputs& matrices);
virtual bool GetMinibatch4SE(std::vector<shared_ptr<const msra::dbn::latticepair>>& latticeinput, vector<size_t>& uids, vector<size_t>& boundaries, vector<size_t>& extrauttmap);
/* guoye: start */
// virtual bool GetMinibatch4SE(std::vector<shared_ptr<const msra::dbn::latticepair>>& latticeinput, vector<size_t>& uids, vector<size_t>& boundaries, vector<size_t>& extrauttmap);
virtual bool GetMinibatch4SE(std::vector<shared_ptr<const msra::dbn::latticepair>>& latticeinput, vector<size_t>& uids, vector<size_t>& wids, vector<short>& nws, vector<size_t>& boundaries, vector<size_t>& extrauttmap);

/* guoye: end */
virtual bool GetHmmData(msra::asr::simplesenonehmm* hmm);

size_t GetNumParallelSequencesForFixingBPTTMode();
Expand Down
174 changes: 172 additions & 2 deletions Source/Common/Include/fileutil.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,22 @@
#include <fcntl.h>

#define FCLOSE_SUCCESS 0
/* guoye: start */
/*
#include "basetypes.h" //for attemp()
#include "ProgressTracing.h"
#include <unistd.h>
#include <glob.h>
#include <dirent.h>
#include <sys/sendfile.h>
#include <stdio.h>
#include <ctype.h>
#include <limits.h>
#include <memory>
#include <cwctype>
*/
// using namespace Microsoft::MSR::CNTK;
/* guoye: end */

// ----------------------------------------------------------------------------
// fopenOrDie(): like fopen() but terminate with err msg in case of error.
Expand Down Expand Up @@ -77,6 +93,7 @@ void freadOrDie(_T& data, size_t num, FILE* f) // template for std::vector<>
freadOrDie(&data[0], sizeof(data[0]), data.size(), f);
}


#ifdef _WIN32
template <class _T>
void freadOrDie(_T& data, int num, const HANDLE f) // template for std::vector<>
Expand Down Expand Up @@ -229,11 +246,129 @@ void fputstring(FILE* f, const wchar_t*);
void fputstring(FILE* f, const std::wstring&);

template <class CHAR>
CHAR* fgetline(FILE* f, CHAR* buf, int size);
CHAR* fgetline(FILE* f, CHAR* buf, int size)
{
// TODO: we should redefine this to write UTF-16 (which matters on GCC which defines wchar_t as 32 bit)
/* guoye: start */
// fprintf(stderr, "\n fileutil.cpp: fgetline: debug 0\n");
/* guoye: end */
CHAR* p = fgets(buf, size, f);
/* guoye: start */
// fprintf(stderr, "\n fileutil.cpp: fgetline: debug 1\n");
/* guoye: end */
if (p == NULL) // EOF reached: next time feof() = true
{
if (ferror(f))
RuntimeError("error reading line: %s", strerror(errno));
buf[0] = 0;
return buf;
}
size_t n = strnlen(p, size);

// check for buffer overflow

if (n >= (size_t)size - 1)
{
/* guoye: start */
// basic_string<CHAR> example(p, n < 100 ? n : 100);
std::basic_string<CHAR> example(p, n < 100 ? n : 100);
/* guoye: end */
uint64_t filepos = fgetpos(f); // (for error message only)
RuntimeError("input line too long at file offset %d (max. %d characters allowed) [%s ...]", (int)filepos, (int)size - 1, msra::strfun::utf8(example).c_str());
}

// remove newline at end

if (n > 0 && p[n - 1] == '\n') // UNIX and Windows style
{
n--;
p[n] = 0;
if (n > 0 && p[n - 1] == '\r') // Windows style
{
n--;
p[n] = 0;
}
}
else if (n > 0 && p[n - 1] == '\r') // Mac style
{
n--;
p[n] = 0;
}

return buf;
}

// this is add to fix the code bug, without this, the code does not support wchar
template <class CHAR>
CHAR* fgetlinew(FILE* f, CHAR* buf, int size)
{
// TODO: we should redefine this to write UTF-16 (which matters on GCC which defines wchar_t as 32 bit)
/* guoye: start */
// fprintf(stderr, "\n fileutil.cpp: fgetline: debug 0\n");
/* guoye: end */
CHAR* p = fgets(buf, size, f);
/* guoye: start */
// fprintf(stderr, "\n fileutil.cpp: fgetline: debug 1\n");
/* guoye: end */
if (p == NULL) // EOF reached: next time feof() = true
{
if (ferror(f))
RuntimeError("error reading line: %s", strerror(errno));
buf[0] = L'\0';
return buf;
}
size_t n = wcsnlen(p, size);

// check for buffer overflow

if (n >= (size_t)size - 1)
{
/* guoye: start */
// basic_string<CHAR> example(p, n < 100 ? n : 100);
std::basic_string<CHAR> example(p, n < 100 ? n : 100);
/* guoye: end */
uint64_t filepos = fgetpos(f); // (for error message only)
RuntimeError("input line too long at file offset %d (max. %d characters allowed) [%s ...]", (int)filepos, (int)size - 1, msra::strfun::utf8(example).c_str());
}

// remove newline at end

if (n > 0 && p[n - 1] == L'\n') // UNIX and Windows style
{
n--;
p[n] = L'\0';
if (n > 0 && p[n - 1] == L'\r') // Windows style
{
n--;
p[n] = L'\0';
}
}
else if (n > 0 && p[n - 1] == L'\r') // Mac style
{
n--;
p[n] = L'\0';
}

return buf;
}

template <class CHAR, size_t n>
CHAR* fgetlinew(FILE* f, CHAR(&buf)[n])
{
/* guoye: start */
// fprintf(stderr, "\n fileutil.h: fgetline(FILE* f, CHAR(&buf)[n]): debug 0\n");
return fgetlinew(f, buf, n);
/* guoye: end */
}

/* guoye: end */
template <class CHAR, size_t n>
CHAR* fgetline(FILE* f, CHAR(&buf)[n])
{
/* guoye: start */
// fprintf(stderr, "\n fileutil.h: fgetline(FILE* f, CHAR(&buf)[n]): debug 0\n");
return fgetline(f, buf, n);
/* guoye: end */
}
std::string fgetline(FILE* f);
std::wstring fgetlinew(FILE* f);
Expand Down Expand Up @@ -902,14 +1037,49 @@ static inline String& trim(String& s)
{
return ltrim(rtrim(s));
}
/* guoye: start */

template<class String>
std::vector<String> SplitString(const String& str, const String& sep);
// move from fileutil.h, the definition and declartion should be at the same file.

// vector<String> SplitString(const String& str, const String& sep)
std::vector<String> SplitString(const String& str, const String& sep)
/* guoye: end */
{
/* guoye: start */
// vector<String> vstr;
std::vector<String> vstr;
/* guoye: end */
String csub;
size_t ifound = 0;
size_t ifoundlast = ifound;
ifound = str.find_first_of(sep, ifound);
while (ifound != String::npos)
{
csub = str.substr(ifoundlast, ifound - ifoundlast);
if (!csub.empty())
vstr.push_back(csub);

ifoundlast = ifound + 1;
ifound = str.find_first_of(sep, ifoundlast);
}
ifound = str.length();
csub = str.substr(ifoundlast, ifound - ifoundlast);
if (!csub.empty())
vstr.push_back(csub);

return vstr;
}
/* guoye: end */
template<class String, class Char>
std::vector<String> SplitString(const String& str, const Char* sep) { return SplitString(str, String(sep)); }

std::wstring s2ws(const std::string& str);

std::string ws2s(const std::wstring& wstr);


/* guoye: start */
// #include "../fileutil.cpp"
/* guoye: end */
#endif // _FILEUTIL_
Loading

0 comments on commit 4018e1e

Please sign in to comment.