Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into add_result_cache2
Browse files Browse the repository at this point in the history
  • Loading branch information
small-turtle-1 committed Oct 21, 2024
2 parents 203befe + c61641c commit 2b9f63f
Show file tree
Hide file tree
Showing 31 changed files with 956 additions and 133 deletions.
14 changes: 13 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -173,10 +173,22 @@ endif ()

MESSAGE(STATUS "C++ Compilation flags: " ${CMAKE_CXX_FLAGS})

if(CMAKE_SYSTEM_PROCESSOR MATCHES "^arm64.*")
set(ARM64 TRUE)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^aarch64.*")
set(ARM64 TRUE)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|amd64)$")
set(X86_64 TRUE)
endif()

#add_definitions(-march=native)
add_definitions(-DSIMDE_ENABLE_NATIVE_ALIASES)
if (CMAKE_C_COMPILER_ID STREQUAL "Clang" AND CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL "18.0")
add_definitions(-mevex512)
if(X86_64)
add_definitions(-mevex512)
else()
add_definitions(-march=native)
endif()
endif ()

execute_process(
Expand Down
31 changes: 31 additions & 0 deletions example/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
{"c1": 'test@gmail.com', "c2": 'email'}, {"c1": 'test@hotmail.com', "c2": 'email'},
{"c1": ' abc', "c2": 'abc'}, {"c1": 'abc ', "c2": 'abc'}, {"c1": ' abc ', "c2": 'abc'}])

# varchar functions

#function char_length
res = table_obj.output(["*", "char_length(c1)"]).filter("char_length(c1) = 1").to_df()
print(res)
Expand Down Expand Up @@ -74,6 +76,35 @@
res = table_obj.output(["*", "char_position(c1, 'bc')"]).filter("char_position(c1, c1) <> 0").to_df()
print(res)

# math functions
db_obj.drop_table("function_example", ConflictType.Ignore)
db_obj.create_table("function_example",
{"c1": {"type": "integer"},
"c2": {"type": "double"}}, ConflictType.Error)
table_obj = db_obj.get_table("function_example")
table_obj.insert(
[{"c1": 1, "c2": 2.4}, {"c1": 3, "c2": 4.5}, {"c1": 5, "c2": 6.6}, {"c1": 7, "c2": 8},
{"c1": 9, "c2": 10}, {"c1": 11, "c2": 12}, {"c1": 13, "c2": 14}, {"c1": 15, "c2": 16},])

#function sqrt
res = table_obj.output(["*", "sqrt(c1)", "sqrt(c2)"]).to_df()
print(res)

res = table_obj.output(["*", "sqrt(c1)", "sqrt(c2)"]).filter("sqrt(c1) = 3").to_df()
print(res)

#function round
res = table_obj.output(["*", "round(c1)", "round(c2)"]).to_df()
print(res)

#function ceiling
res = table_obj.output(["*", "ceil(c1)", "ceil(c2)"]).to_df()
print(res)

#function floor
res = table_obj.output(["*", "floor(c1)", "floor(c2)"]).to_df()
print(res)

res = db_obj.drop_table("function_example")

infinity_obj.disconnect()
56 changes: 48 additions & 8 deletions example/http/functions.sh
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ curl --request POST \
{
"name": "tensor",
"type": "tensor,4,float"
},
{
"name": "decimal",
"type": "double"
}
]
} '
Expand All @@ -62,63 +66,71 @@ curl --request POST \
"vec": [1.0, 1.2, 0.8, 0.9],
"sparse_column": {"10":1.1, "20":2.2, "30": 3.3},
"year": 2024,
"tensor": [[1.0, 0.0, 0.0, 0.0], [1.1, 0.0, 0.0, 0.0]]
"tensor": [[1.0, 0.0, 0.0, 0.0], [1.1, 0.0, 0.0, 0.0]],
"decimal": 1.4
},
{
"num": 2,
"body": "12345",
"vec": [4.0, 4.2, 4.3, 4.5],
"sparse_column": {"40":4.4, "50":5.5, "60": 6.6},
"year": 2023,
"tensor": [[4.0, 0.0, 4.3, 4.5], [4.0, 4.2, 4.4, 5.0]]
"tensor": [[4.0, 0.0, 4.3, 4.5], [4.0, 4.2, 4.4, 5.0]],
"decimal": 1.5
},
{
"num": 3,
"body": "123456",
"vec": [4.0, 4.2, 4.3, 4.2],
"sparse_column": {"70":7.7, "80":8.8, "90": 9.9},
"year": 2019,
"tensor": [[0.9, 0.1, 0.0, 0.0], [1.1, 0.0, 0.0, 0.0]]
"tensor": [[0.9, 0.1, 0.0, 0.0], [1.1, 0.0, 0.0, 0.0]],
"decimal": -1.4
},
{
"num": 4,
"body": "123456789",
"vec": [4.0, 4.2, 4.3, 4.5],
"sparse_column": {"20":7.7, "80":7.8, "90": 97.9},
"year": 2018,
"tensor": [[5.0, 4.2, 4.3, 4.5], [4.0, 4.2, 4.3, 4.4]]
"tensor": [[5.0, 4.2, 4.3, 4.5], [4.0, 4.2, 4.3, 4.4]],
"decimal": -1.5
},
{
"num": 5,
"body": "test@gmail.com",
"vec": [4.0, 4.2, 4.3, 4.5],
"sparse_column": {"20":7.7, "80":7.8, "90": 97.9},
"year": 2018,
"tensor": [[5.0, 4.2, 4.3, 4.5], [4.0, 4.2, 4.3, 4.4]]
"tensor": [[5.0, 4.2, 4.3, 4.5], [4.0, 4.2, 4.3, 4.4]],
"decimal": 1
},
{
"num": 6,
"body": "test@hotmailcom",
"vec": [4.0, 4.2, 4.3, 4.5],
"sparse_column": {"20":7.7, "80":7.8, "90": 97.9},
"year": 2018,
"tensor": [[5.0, 4.2, 4.3, 4.5], [4.0, 4.2, 4.3, 4.4]]
"tensor": [[5.0, 4.2, 4.3, 4.5], [4.0, 4.2, 4.3, 4.4]],
"decimal": -1
},
{
"num": 7,
"body": "this is a sentence including a mail address, test@hotmail.com",
"vec": [4.0, 4.2, 4.3, 4.5],
"sparse_column": {"20":7.7, "80":7.8, "90": 97.9},
"year": 2018,
"tensor": [[5.0, 4.2, 4.3, 4.5], [4.0, 4.2, 4.3, 4.4]]
"tensor": [[5.0, 4.2, 4.3, 4.5], [4.0, 4.2, 4.3, 4.4]],
"decimal": 0.4
},
{
"num": 8,
"body": " this is an example for trim ",
"vec": [4.0, 4.2, 4.3, 4.5],
"sparse_column": {"20":7.7, "80":7.8, "90": 97.9},
"year": 2018,
"tensor": [[5.0, 4.2, 4.3, 4.5], [4.0, 4.2, 4.3, 4.4]]
"tensor": [[5.0, 4.2, 4.3, 4.5], [4.0, 4.2, 4.3, 4.4]],
"decimal": 0.5
}
] '

Expand Down Expand Up @@ -316,6 +328,34 @@ curl --request GET \
"filter": "char_position(body, '123') = 1"
} '

# show rows of 'tbl1' with sqrt(num)
echo -e '\n\n-- show rows of 'tbl1' with sqrt(num)'
curl --request GET \
--url http://localhost:23820/databases/default_db/tables/tbl1/docs \
--header 'accept: application/json' \
--header 'content-type: application/json' \
--data '
{
"output":
[
"num", "sqrt(num)"
]
} '

# show rows of 'tbl1' with decimal, round(decimal), ceil(decimal), floor(decimal)
echo -e '\n\n-- show rows of 'tbl1' with decimal, round(decimal), ceil(decimal), floor(decimal)'
curl --request GET \
--url http://localhost:23820/databases/default_db/tables/tbl1/docs \
--header 'accept: application/json' \
--header 'content-type: application/json' \
--data '
{
"output":
[
"decimal", "round(decimal)", "ceil(decimal)", "floor(decimal)"
]
} '


# drop tbl1
echo -e '\n\n-- drop tbl1'
Expand Down
34 changes: 24 additions & 10 deletions python/infinity_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -661,7 +661,7 @@ def to_df(self):

for res in self.output_res:
for k in res:
print(res[k])
#print(res[k])
if k not in df_dict:
df_dict[k] = ()
tup = df_dict[k]
Expand All @@ -686,27 +686,41 @@ def to_df(self):
new_tup = tup + (res[k],)
df_dict[k] = new_tup
# print(self.output_res)
# print(df_dict)
print(df_dict)

df_type = {}
for k in df_dict:
if k in col_types: # might be object
df_type[k] = type_to_dtype(col_types[k])
if k in ["DISTANCE", "SCORE", "SIMILARITY"]:
df_type[k] = dtype('float32')
# "(c1 + c2)"
k1 = k.replace("(", "")
k1 = k1.replace(")", "")
cols = k1.split("+") + k1.split("-") # ["c1 ", " c2", "c1 + c2"]
# print(cols)
# haven't considered data type priority
# "(c1 + c2)", "sqrt(c1), round(c1)"
k1 = k.replace("(", " ")
k1 = k1.replace(")", " ")
k1 = k1.replace("+", " ")
k1 = k1.replace("-", " ")
cols = k1.split(" ")
#print(cols)

function_name = ""
for col in cols:
#print(function_name)
if col.strip() in col_types:
df_type[k] = type_to_dtype(col_types[col.strip()])
if col.strip().isdigit():
df_type[k] = function_return_type(function_name, df_type[k])
elif col.strip().isdigit() and df_type[k] != dtype('float64'):
df_type[k] = dtype('int32')
if is_float(col.strip()):
df_type[k] = function_return_type(function_name, df_type[k])
elif is_float(col.strip()):
df_type[k] = dtype('float64')
df_type[k] = function_return_type(function_name, df_type[k])
else:
function_name = col.strip().lower()
if (function_name in functions):
df_type[k] = function_return_type(function_name, None)
if (function_name in bool_functions):
df_type[k] = dtype('bool')
break
return pd.DataFrame(df_dict).astype(df_type)

def to_arrow(self):
Expand Down
21 changes: 21 additions & 0 deletions python/test_pysdk/common/common_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,27 @@
"double", "varchar", "boolean"
]

functions = [
"sqrt", "round", "ceil", "floor", "filter_text", "filter_fulltext", "or", "and", "not"
]

bool_functions = [
"filter_text", "filter_fulltext", "or", "and", "not"
]

def function_return_type(function_name, param_type) :
if function_name == "sqrt":
return dtype('float64')
elif function_name == "round" or function_name == "ceil" or function_name == "floor":
if(param_type == dtype('int8') or param_type == dtype('int16') or param_type == dtype('int32') or param_type == dtype('int64')):
return param_type
else:
return dtype('float64')
elif function_name == "filter_text" or function_name == "filter_fulltext" or function_name == "or" or function_name == "and" or function_name == "not":
return dtype('bool')
else:
return param_type

unsupport_output = ["_similarity", "_row_id", "_score", "_distance"]

type_transfrom = {
Expand Down
52 changes: 52 additions & 0 deletions python/test_pysdk/test_select.py
Original file line number Diff line number Diff line change
Expand Up @@ -943,4 +943,56 @@ def test_select_position(self, suffix):
.astype({'c1': dtype('O'), 'c2': dtype('O')}))

res = db_obj.drop_table("test_select_position"+suffix)
assert res.error_code == ErrorCode.OK

def test_select_sqrt(self, suffix):
db_obj = self.infinity_obj.get_database("default_db")
db_obj.drop_table("test_select_sqrt"+suffix, ConflictType.Ignore)
db_obj.create_table("test_select_sqrt"+suffix,
{"c1": {"type": "integer"},
"c2": {"type": "double"}}, ConflictType.Error)
table_obj = db_obj.get_table("test_select_sqrt"+suffix)
table_obj.insert(
[{"c1": '1', "c2": '2'}, {"c1": '4', "c2": '5'}, {"c1": '9', "c2": '10'}, {"c1": '16', "c2": '17'}])

res = table_obj.output(["*", "sqrt(c1)", "sqrt(c2)"]).to_df()
print(res)

res = table_obj.output(["*"]).filter("sqrt(c1) = 2").to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': (4,),
'c2': (5,)})
.astype({'c1': dtype('int32'), 'c2': dtype('double')}))

res = db_obj.drop_table("test_select_sqrt"+suffix)
assert res.error_code == ErrorCode.OK

def test_select_round(self, suffix):
db_obj = self.infinity_obj.get_database("default_db")
db_obj.drop_table("test_select_round"+suffix, ConflictType.Ignore)
db_obj.create_table("test_select_round"+suffix,
{"c1": {"type": "integer"},
"c2": {"type": "double"}}, ConflictType.Error)
table_obj = db_obj.get_table("test_select_round"+suffix)
table_obj.insert(
[{"c1": '1', "c2": '2.4'}, {"c1": '4', "c2": '-2.4'}, {"c1": '9', "c2": '2.5'}, {"c1": '16', "c2": '-2.5'}])

res = table_obj.output(["c1", "round(c2)"]).to_df()
print(res)
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': (1, 4, 9, 16),
'round(c2)': (2, -2, 3, -3)})
.astype({'c1': dtype('int32'), 'round(c2)': dtype('double')}))

res = table_obj.output(["c1", "ceil(c2)"]).to_df()
print(res)
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': (1, 4, 9, 16),
'ceil(c2)': (3, -2, 3, -2)})
.astype({'c1': dtype('int32'), 'ceil(c2)': dtype('double')}))

res = table_obj.output(["c1", "floor(c2)"]).to_df()
print(res)
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': (1, 4, 9, 16),
'floor(c2)': (2, -3, 2, -3)})
.astype({'c1': dtype('int32'), 'floor(c2)': dtype('double')}))

res = db_obj.drop_table("test_select_round"+suffix)
assert res.error_code == ErrorCode.OK
Loading

0 comments on commit 2b9f63f

Please sign in to comment.