Skip to content

Commit

Permalink
Support UTF-8 characters in feature name again
Browse files Browse the repository at this point in the history
This commit reverts 0d59859.
Also see:
- microsoft#2226
- microsoft#2478
- microsoft#2229

I reproduced the issue and as @kidotaka gave us a great survey in microsoft#2226,
I don't conclude that the cause is UTF-8, but "an empty string (character)".
Therefore, I revert "throw error when meet non ascii (microsoft#2229)" whose commit hash
is 0d59859, and add support feture names as UTF-8 again.
  • Loading branch information
henry0312 committed Apr 6, 2020
1 parent 6d9e7f1 commit 9114d4c
Show file tree
Hide file tree
Showing 4 changed files with 1 addition and 18 deletions.
5 changes: 0 additions & 5 deletions include/LightGBM/dataset.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
#include <LightGBM/config.h>
#include <LightGBM/feature_group.h>
#include <LightGBM/meta.h>
#include <LightGBM/utils/common.h>
#include <LightGBM/utils/openmp_wrapper.h>
#include <LightGBM/utils/random.h>
#include <LightGBM/utils/text_reader.h>
Expand Down Expand Up @@ -633,10 +632,6 @@ class Dataset {
// replace ' ' in feature_names with '_'
bool spaceInFeatureName = false;
for (auto& feature_name : feature_names_) {
// check ascii
if (!Common::CheckASCII(feature_name)) {
Log::Fatal("Do not support non-ASCII characters in feature name.");
}
// check json
if (!Common::CheckAllowedJSON(feature_name)) {
Log::Fatal("Do not support special JSON characters in feature name.");
Expand Down
9 changes: 0 additions & 9 deletions include/LightGBM/utils/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -921,15 +921,6 @@ static T SafeLog(T x) {
}
}

inline bool CheckASCII(const std::string& s) {
for (auto c : s) {
if (static_cast<unsigned char>(c) > 127) {
return false;
}
}
return true;
}

inline bool CheckAllowedJSON(const std::string& s) {
unsigned char char_code;
for (auto c : s) {
Expand Down
3 changes: 0 additions & 3 deletions src/io/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,6 @@ void Config::KV2Map(std::unordered_map<std::string, std::string>* params, const
if (tmp_strs.size() == 2) {
value = Common::RemoveQuotationSymbol(Common::Trim(tmp_strs[1]));
}
if (!Common::CheckASCII(key) || !Common::CheckASCII(value)) {
Log::Fatal("Do not support non-ASCII characters in config.");
}
if (key.size() > 0) {
auto value_search = params->find(key);
if (value_search == params->end()) { // not set
Expand Down
2 changes: 1 addition & 1 deletion tests/c_api_test/test_.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def c_array(ctype, values):


def c_str(string):
return ctypes.c_char_p(string.encode('ascii'))
return ctypes.c_char_p(string.encode('utf8'))


def load_from_file(filename, reference):
Expand Down

0 comments on commit 9114d4c

Please sign in to comment.