Skip to content

Commit

Permalink
neg sampling
Browse files Browse the repository at this point in the history
  • Loading branch information
ziyoujiyi committed Oct 19, 2021
1 parent 5f4af11 commit db7b0ee
Show file tree
Hide file tree
Showing 9 changed files with 459 additions and 14 deletions.
4 changes: 4 additions & 0 deletions cmake/configure.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ if(WITH_PSLIB)
add_definitions(-DPADDLE_WITH_PSLIB)
endif()

if(WITH_SAMPLING)
add_definitions(-DNEG_INS_SAMPLING)
endif()

if(WITH_GLOO)
add_definitions(-DPADDLE_WITH_GLOO)
endif()
Expand Down
59 changes: 57 additions & 2 deletions paddle/fluid/framework/data_feed.cc
Original file line number Diff line number Diff line change
Expand Up @@ -243,27 +243,37 @@ InMemoryDataFeed<T>::InMemoryDataFeed() {

template <typename T>
bool InMemoryDataFeed<T>::Start() {
VLOG(3) << "entering InMemoryDataFeed<T>::Start()";
#ifdef _LINUX
this->CheckSetFileList();
if (output_channel_->Size() == 0 && input_channel_->Size() != 0) {
#ifdef NEG_INS_SAMPLING
VLOG(3) << "transfer data from input_channel_ to output_channel_";
#endif
std::vector<T> data;
input_channel_->Read(data);
output_channel_->Write(std::move(data));
}
#endif
this->finish_start_ = true;
VLOG(3) << "exit InMemoryDataFeed<T>::Start()";
return true;
}

template <typename T>
int InMemoryDataFeed<T>::Next() {
#ifdef NEG_INS_SAMPLING
VLOG(3) << "entering InMemoryDataFeed<T>::Next()";
#endif
#ifdef _LINUX
this->CheckStart();
CHECK(output_channel_ != nullptr);
CHECK(consume_channel_ != nullptr);
#ifdef NEG_INS_SAMPLING
VLOG(3) << "output_channel_ size=" << output_channel_->Size()
<< ", consume_channel_ size=" << consume_channel_->Size()
<< ", thread_id=" << thread_id_;
#endif
int index = 0;
T instance;
std::vector<T> ins_vec;
Expand Down Expand Up @@ -397,6 +407,7 @@ void InMemoryDataFeed<T>::LoadIntoMemory() {
<< ", cost time=" << timeline.ElapsedSec()
<< " seconds, thread_id=" << thread_id_;
}
VLOG(3) << "datafeed input_channel_ size: " << input_channel_->Size();
VLOG(3) << "LoadIntoMemory() end, thread_id=" << thread_id_;
#endif
}
Expand Down Expand Up @@ -629,7 +640,6 @@ bool MultiSlotDataFeed::ParseOneInstanceFromPipe(

const char* str = reader.get();
std::string line = std::string(str);
// VLOG(3) << line;
char* endptr = const_cast<char*>(str);
int pos = 0;
for (size_t i = 0; i < use_slots_index_.size(); ++i) {
Expand Down Expand Up @@ -838,6 +848,12 @@ void MultiSlotInMemoryDataFeed::Init(
use_slots_shape_.push_back(local_shape);
}
}
#ifdef NEG_INS_SAMPLING
uid_slot_ = multi_slot_desc.uid_slot();
label_slot_ = multi_slot_desc.label_slot();
VLOG(3) << "uid_slot name is: " << uid_slot_;
VLOG(3) << "label_slot name is: " << label_slot_;
#endif
feed_vec_.resize(use_slots_.size());
pipe_command_ = data_feed_desc.pipe_command();
finish_init_ = true;
Expand All @@ -859,14 +875,19 @@ void MultiSlotInMemoryDataFeed::GetMsgFromLogKey(const std::string& log_key,

bool MultiSlotInMemoryDataFeed::ParseOneInstanceFromPipe(Record* instance) {
#ifdef _LINUX
#ifdef NEG_INS_SAMPLING
VLOG(3) << "entering MultiSlotInMemoryDataFeed::ParseOneInstanceFromPipe";
#endif
thread_local string::LineFileReader reader;

if (!reader.getline(&*(fp_.get()))) {
return false;
} else {
const char* str = reader.get();
std::string line = std::string(str);
// VLOG(3) << line;
#ifdef NEG_INS_SAMPLING
VLOG(3) << "MultiSlotInMemoryDataFeed - instance from pip: " << line;
#endif
char* endptr = const_cast<char*>(str);
int pos = 0;
if (parse_ins_id_) {
Expand Down Expand Up @@ -913,6 +934,9 @@ bool MultiSlotInMemoryDataFeed::ParseOneInstanceFromPipe(Record* instance) {
instance->rank = rank;
pos += len + 1;
}
#ifdef NEG_INS_SAMPLING
VLOG(3) << "use_slots_index_.size() is: " << use_slots_index_.size();
#endif
for (size_t i = 0; i < use_slots_index_.size(); ++i) {
int idx = use_slots_index_[i];
int num = strtol(&str[pos], &endptr, 10);
Expand All @@ -929,6 +953,37 @@ bool MultiSlotInMemoryDataFeed::ParseOneInstanceFromPipe(Record* instance) {
"\nWe detect the feasign number of this slot is %d, "
"which is illegal.",
str, i, num));

#ifdef NEG_INS_SAMPLING
VLOG(3) << "num: " << num;
VLOG(3) << "curr slot name is: " << all_slots_[i];
if (all_slots_[i] == uid_slot_) { // std::string
/*
PADDLE_ENFORCE(num == 1 && all_slots_type_[i][0] == 'u',
"The uid has to be uint64 and single.\n"
"please check this error line: %s",
str);
*/
// char* uidptr = endptr;
// uint64_t feasign = (uint64_t)strtoull(uidptr, &uidptr, 10);
// instance->uid_ = feasign;
}
// instance->uid_ = 0; // for test
VLOG(3) << "uid_slot_: " << uid_slot_;
VLOG(3) << "instance->uid_: " << instance->uid_;
if (all_slots_[i] == label_slot_) {
PADDLE_ENFORCE(num == 1 && all_slots_type_[i][0] == 'u',
"The label has to be uint64 and single.\n"
"please check this error line: %s",
str);
char* labelptr = endptr;
uint64_t feasign = (uint64_t)strtoull(labelptr, &labelptr, 10);
instance->label_ = feasign;
VLOG(3) << "label_slot_: " << label_slot_;
}
VLOG(3) << "instance->label_: " << instance->label_;
#endif

if (idx != -1) {
if (all_slots_type_[i][0] == 'f') { // float
for (int j = 0; j < num; ++j) {
Expand Down
21 changes: 20 additions & 1 deletion paddle/fluid/framework/data_feed.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,12 @@ struct Record {
uint64_t search_id;
uint32_t rank;
uint32_t cmatch;

#ifdef NEG_INS_SAMPLING
uint64_t uid_;
uint64_t label_;
// std::vector<std::string> auc_tags_;
#endif
};

struct PvInstanceObject {
Expand Down Expand Up @@ -147,6 +153,7 @@ class DataFeed {

// This function will do nothing at default
virtual void SetInputChannel(void* channel) {}

// This function will do nothing at default
virtual void SetOutputChannel(void* channel) {}
// This function will do nothing at default
Expand Down Expand Up @@ -179,6 +186,10 @@ class DataFeed {
place_ = place;
}
virtual const paddle::platform::Place& GetPlace() const { return place_; }
#ifdef NEG_INS_SAMPLING
std::string uid_slot_;
std::string label_slot_;
#endif

protected:
// The following three functions are used to check if it is executed in this
Expand Down Expand Up @@ -471,7 +482,7 @@ paddle::framework::Archive<AR>& operator>>(paddle::framework::Archive<AR>& ar,
for (size_t& x : offset) {
uint64_t t;
ar >> t;
x = (size_t)t;
x = static_cast<size_t>(t);
}
#endif
ar >> ins.MutableFloatData();
Expand Down Expand Up @@ -552,6 +563,10 @@ paddle::framework::Archive<AR>& operator<<(paddle::framework::Archive<AR>& ar,
ar << r.uint64_feasigns_;
ar << r.float_feasigns_;
ar << r.ins_id_;
#ifdef NEG_INS_SAMPLING
ar << r.uid_;
ar << r.label_;
#endif
return ar;
}

Expand All @@ -561,6 +576,10 @@ paddle::framework::Archive<AR>& operator>>(paddle::framework::Archive<AR>& ar,
ar >> r.uint64_feasigns_;
ar >> r.float_feasigns_;
ar >> r.ins_id_;
#ifdef NEG_INS_SAMPLING
ar >> r.uid_;
ar >> r.label_;
#endif
return ar;
}

Expand Down
8 changes: 7 additions & 1 deletion paddle/fluid/framework/data_feed.proto
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,13 @@ message Slot {
repeated int32 shape = 5; // we can define N-D Tensor
}

message MultiSlotDesc { repeated Slot slots = 1; }
message MultiSlotDesc {
repeated Slot slots = 1;
// NEG_INS_SAMPLING
optional string uid_slot = 2;
optional string label_slot = 3;
// NEG_INS_SAMPLING
}

message DataFeedDesc {
optional string name = 1;
Expand Down
Loading

1 comment on commit db7b0ee

@paddle-bot-old
Copy link

@paddle-bot-old paddle-bot-old bot commented on db7b0ee Oct 19, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🕵️ CI failures summary

🔍 PR: #36564 Commit ID: db7b0ee contains failed CI.

🔹 Failed: PR-CI-Mac-18

Unknown Failed
2021-10-20 01:26:32 [INFO] Running on (paddle-mac-arm-2) with agent(v1.3.0) in workspace /Users/paddle/xly/workspace/4fd3485e-b49d-41cd-b877-dada3ed18a63
2021-10-20 01:26:32 + rm -rf /home/Paddle
2021-10-20 01:26:32 + cd /home
2021-10-20 01:26:32 + git clone -b release/1.8 https://github.com/PaddlePaddle/Paddle.git
2021-10-20 01:26:32 fatal: could not create work tree dir 'Paddle': Operation not supported

🔹 Failed: PR-CI-Mac-Python3-18

Unknown Failed
2021-10-20 01:28:11 + true
2021-10-20 01:28:11 + rm -rf python/dist
2021-10-20 01:28:11 + echo gym
2021-10-20 01:28:11 + PYTHON_FLAGS=
2021-10-20 01:28:11 ++ uname -s
2021-10-20 01:28:11 + SYSTEM=Darwin
2021-10-20 01:28:11 + '[' Darwin == Darwin ']'
2021-10-20 01:28:11 + echo 'Using python abi: cp35-cp35m'
2021-10-20 01:28:11 Using python abi: cp35-cp35m
2021-10-20 01:28:11 + [[ cp35-cp35m == \c\p\2\7-\c\p\2\7\m ]]
2021-10-20 01:28:11 + [[ cp35-cp35m == '' ]]
2021-10-20 01:28:11 + '[' cp35-cp35m == cp35-cp35m ']'
2021-10-20 01:28:11 + '[' -d /Library/Frameworks/Python.framework/Versions/3.5 ']'
2021-10-20 01:28:11 + exit 1
2021-10-20 01:28:11 + EXCODE=1
2021-10-20 01:28:11 + echo 'EXCODE: 1'
2021-10-20 01:28:11 EXCODE: 1
2021-10-20 01:28:11 + '[' 1 -eq 0 ']'
2021-10-20 01:28:11 + exit 1

🔹 Failed: PR-CI-Coverage-18

Unknown Failed
2021-10-20 01:30:31   Using cached PyJWT-1.7.1-py2.py3-none-any.whl (18 kB)
2021-10-20 01:30:31 Collecting deprecated
2021-10-20 01:30:31 Using cached Deprecated-1.2.13-py2.py3-none-any.whl (9.6 kB)
2021-10-20 01:30:31 Requirement already satisfied: wrapt<2,>=1.10 in /usr/local/python2.7.15/lib/python2.7/site-packages (from deprecated->PyGithub->-r /paddle/python/requirements.txt (line 25)) (1.12.1)
2021-10-20 01:30:31 Installing collected packages: gast, pyjwt, deprecated, PyGithub, pycrypto
2021-10-20 01:30:31 Attempting uninstall: gast
2021-10-20 01:30:31 Found existing installation: gast 0.4.0
2021-10-20 01:30:31 Uninstalling gast-0.4.0:
2021-10-20 01:30:32 Successfully uninstalled gast-0.4.0
2021-10-20 01:30:33 Successfully installed PyGithub-1.45 deprecated-1.2.13 gast-0.3.3 pycrypto-2.6.1 pyjwt-1.7.1
2021-10-20 01:30:33 + set -e
2021-10-20 01:30:33 + wget https://nixos.org/releases/patchelf/patchelf-0.10/patchelf-0.10.tar.gz
2021-10-20 01:30:33 --2021-10-19 17:30:33-- https://nixos.org/releases/patchelf/patchelf-0.10/patchelf-0.10.tar.gz
2021-10-20 01:30:33 Connecting to 172.19.57.45:3128... connected.
2021-10-20 01:30:34 ERROR: cannot verify nixos.org's certificate, issued by 'CN=R3,O=Let's Encrypt,C=US':
2021-10-20 01:30:34 Issued certificate has expired.
2021-10-20 01:30:34 To connect to nixos.org insecurely, use `--no-check-certificate'.
2021-10-20 01:30:34 {build code state=5}
2021-10-20 01:30:34 the build(c18bdfa3108949d2a8172724ad8797de) state is BUILD_CODE_FAIL

🔹 Failed: PR-CI-Py35-18

Unknown Failed
2021-10-20 01:33:27 + '[' 8 -eq 0 ']'
2021-10-20 01:33:27 + set -e
2021-10-20 01:33:27 + file_push=ON
2021-10-20 01:33:27 + update_cached_package=ON
2021-10-20 01:33:27 + echo PyGithub
2021-10-20 01:33:27 + echo coverage
2021-10-20 01:33:27 + echo pycrypto
2021-10-20 01:33:27 + http_proxy=http://172.19.56.199:3128
2021-10-20 01:33:27 + https_proxy=http://172.19.56.199:3128
2021-10-20 01:33:27 + yum install -y graphviz
2021-10-20 01:33:28 Loaded plugins: fastestmirror, ovl
2021-10-20 01:33:28 Setting up Install Process
2021-10-20 01:33:28 Determining fastest mirrors
2021-10-20 01:33:28 Error: Cannot find a valid baseurl for repo: base
2021-10-20 01:33:28 YumRepo Error: All mirror URLs are not using ftp, http[s] or file.
2021-10-20 01:33:28 Eg. Invalid release/repo/arch combination/
2021-10-20 01:33:28 removing mirrorlist with no valid mirrors: /var/cache/yum/x86_64/6/base/mirrorlist.txt
2021-10-20 01:33:28 {build code state=1}
2021-10-20 01:33:29 the build(e1cdf65e073e401c8ba4df6a2be2fcc4) state is BUILD_CODE_FAIL

🔹 Failed: PR-CI-Inference-18

Unknown Failed
2021-10-20 01:45:51 [ 31%] Linking CXX static library libsgd_op.a
2021-10-20 01:45:51 [ 31%] Built target sgd_op
2021-10-20 01:45:55 Scanning dependencies of target dgc_momentum_op
2021-10-20 01:45:55 [ 31%] Building CXX object paddle/fluid/operators/optimizers/CMakeFiles/dgc_momentum_op.dir/dgc_momentum_op.cc.o
2021-10-20 01:45:55 [ 31%] Linking CXX static library libdgc_momentum_op.a
2021-10-20 01:45:55 [ 31%] Built target dgc_momentum_op
2021-10-20 01:46:00 Scanning dependencies of target softmax
2021-10-20 01:46:00 [ 31%] Building CXX object paddle/fluid/operators/math/CMakeFiles/softmax.dir/softmax.cc.o
2021-10-20 01:46:00 [ 31%] Linking CXX static library libsoftmax.a
2021-10-20 01:46:00 [ 31%] Built target softmax
2021-10-20 01:46:00 CMakeFiles/Makefile2:2638: recipe for target 'CMakeFiles/inference_lib_dist.dir/rule' failed
2021-10-20 01:46:00 make[1]: *** [CMakeFiles/inference_lib_dist.dir/rule] Error 2
2021-10-20 01:46:00 Makefile:177: recipe for target 'inference_lib_dist' failed
2021-10-20 01:46:00 make: *** [inference_lib_dist] Error 2
2021-10-20 01:46:00 + build_error=2
2021-10-20 01:46:00 + '[' 2 '!=' 0 ']'
2021-10-20 01:46:00 + exit 7
2021-10-20 01:46:00 {build code state=7}
2021-10-20 01:46:01 the build(25afa1afad28441f850a8eb3107ad51f) state is BUILD_CODE_FAIL

🔹 Failed: PR-CI-CPU-Py2-18

Unknown Failed
2021-10-20 01:59:01     from_path, content_type = download(link, temp_dir.path)
2021-10-20 01:59:01 File "/workspace/Paddle/build/.check_api_workspace/.DEV_env/lib/python2.7/site-packages/pip/_internal/network/download.py", line 163, in call
2021-10-20 01:59:01 for chunk in chunks:
2021-10-20 01:59:01 File "/workspace/Paddle/build/.check_api_workspace/.DEV_env/lib/python2.7/site-packages/pip/_internal/cli/progress_bars.py", line 168, in iter
2021-10-20 01:59:01 for x in it:
2021-10-20 01:59:01 File "/workspace/Paddle/build/.check_api_workspace/.DEV_env/lib/python2.7/site-packages/pip/_internal/network/utils.py", line 88, in response_chunks
2021-10-20 01:59:01 decode_content=False,
2021-10-20 01:59:01 File "/workspace/Paddle/build/.check_api_workspace/.DEV_env/lib/python2.7/site-packages/pip/_vendor/urllib3/response.py", line 576, in stream
2021-10-20 01:59:01 data = self.read(amt=amt, decode_content=decode_content)
2021-10-20 01:59:01 File "/workspace/Paddle/build/.check_api_workspace/.DEV_env/lib/python2.7/site-packages/pip/_vendor/urllib3/response.py", line 541, in read
2021-10-20 01:59:01 raise IncompleteRead(self._fp_bytes_read, self.length_remaining)
2021-10-20 01:59:01 File "/usr/local/python2.7.15/lib/python2.7/contextlib.py", line 35, in exit
2021-10-20 01:59:01 self.gen.throw(type, value, traceback)
2021-10-20 01:59:01 File "/workspace/Paddle/build/.check_api_workspace/.DEV_env/lib/python2.7/site-packages/pip/_vendor/urllib3/response.py", line 451, in _error_catcher
2021-10-20 01:59:01 raise ReadTimeoutError(self._pool, None, "Read timed out.")
2021-10-20 01:59:01 ReadTimeoutError: HTTPSConnectionPool(host='mirrors.aliyun.com', port=443): Read timed out.
2021-10-20 01:59:04 + :
2021-10-20 01:59:04 {build code state=2}
2021-10-20 01:59:04 the build(dfd313cb53074691924c8732fe7fb345) state is BUILD_CODE_FAIL

Please sign in to comment.