Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow compilation with Visual Studio; add flag for output file instead of stdout; document flags in usage arg; allow forced align to produce same output format as train+align. #34

Open
wants to merge 13 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR})
find_package(SparseHash)
if(SPARSEHASH_FOUND)
add_definitions(-DHAVE_SPARSEHASH)
include_directories(${SPARSEHASH_INCLUDE_DIR})
endif(SPARSEHASH_FOUND)

find_package(OpenMP QUIET)
find_package(OpenMP)
if (OPENMP_FOUND)
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
endif(OPENMP_FOUND)
Expand Down
16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,22 @@ These can be symmetrized using the included `atools` command using a variety of

./atools -i forward.align -j reverse.align -c grow-diag-final-and


## Compiling for windows

To build 64-bit binaries with libsparsehash on Windows using Visual Studio 2017, the following recipe works:


cd d:\src
git clone https://github.com/clab/fast_align.git
git clone https://github.com/sparsehash/sparsehash.git
cd d:\src\fast_align
mkdir build64
cd build64
cmake -G "Visual Studio 15 2017 Win64" -D SPARSEHASH_INCLUDE_DIR=d:\src\sparsehash\src ..
cmake --build build64 --config Release


## Output

`fast_align` produces outputs in the widely-used `i-j` “Pharaoh format,” where a pair `i-j` indicates that the <i>i</i>th word (zero-indexed) of the left language (by convention, the *source* language) is aligned to the <i>j</i>th word of the right sentence (by convention, the *target* language). For example, a good alignment of the above German–English corpus would be:
Expand Down
15 changes: 12 additions & 3 deletions src/atools.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,11 @@
#include <queue>
#include <map>
#include <set>
#ifndef _MSC_VER
#include <getopt.h>
#else
#include "getopt.h"
#endif

#include "alignment_io.h"

Expand All @@ -16,22 +20,27 @@ struct option options[] = {
{"input_1", required_argument, 0, 'i'},
{"input_2", required_argument, 0, 'j'},
{"command", required_argument, 0, 'c'},
{"outfile", required_argument, 0, 'o'},
{0,0,0,0}
};

string input_1;
string input_2;
string command;
ofstream outfileStream;
ostream* outstream = &cout;


bool InitCommandLine(int argc, char** argv) {
while (1) {
int oi;
int c = getopt_long(argc, argv, "i:j:c:", options, &oi);
int c = getopt_long(argc, argv, "i:j:c:o:", options, &oi);
if (c == -1) break;
switch(c) {
case 'i': input_1 = optarg; break;
case 'j': input_2 = optarg; break;
case 'c': command = optarg; break;
case 'o': outfileStream.open(optarg); outstream = &outfileStream; break;
default: return false;
}
}
Expand Down Expand Up @@ -308,7 +317,7 @@ int main(int argc, char **argv) {
AddCommand<GDFACommand>();
AddCommand<FMeasureCommand>();
if (!InitCommandLine(argc, argv)) {
cerr << "Usage: " << argv[0] << " -c COMMAND -i FILE1.AL [-j FILE2.AL]\n";
cerr << "Usage: " << argv[0] << " -c COMMAND -i FILE1.AL [-j FILE2.AL] [-o OUTPATH]\n";
cerr << "Valid options for COMMAND:";
for (auto it : commands)
cerr << ' ' << it.first;
Expand Down Expand Up @@ -368,7 +377,7 @@ int main(int argc, char **argv) {
}

if (cmd.Result() == 1) {
AlignmentIO::SerializePharaohFormat(*out, &cout);
AlignmentIO::SerializePharaohFormat(*out, outstream);
}
}
if (cmd.Result() == 2)
Expand Down
4 changes: 2 additions & 2 deletions src/corpus.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,15 @@ class Dict {
return (x == ' ' || x == '\t');
}

inline void ConvertWhitespaceDelimitedLine(const std::string& line, std::vector<unsigned>* out) {
inline void ConvertWhitespaceDelimitedLine(const std::string& line, std::vector<unsigned>* out, bool frozen=false) {
size_t cur = 0;
size_t last = 0;
int state = 0;
out->clear();
while(cur < line.size()) {
if (is_ws(line[cur++])) {
if (state == 0) continue;
out->push_back(Convert(line.substr(last, cur - last - 1)));
out->push_back(Convert(line.substr(last, cur - last - 1), frozen));
state = 0;
} else {
if (state == 1) continue;
Expand Down
Loading