Skip to content

Commit 6cfef24

Browse files
author
Lukas Drapal
committed
added serving_test_gen.py
1 parent a4013f6 commit 6cfef24

File tree

1 file changed

+85
-0
lines changed

1 file changed

+85
-0
lines changed

python/serving_test_gen.py

+85
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
"""Contains methods to generate a JSON file for Seldon API integration testing."""
2+
3+
import os
4+
import random
5+
from typing import List, Optional, Union
6+
7+
import numpy as np
8+
import pandas as pd
9+
10+
RANGE_INTEGER_MIN = 0
11+
RANGE_INTEGER_MAX = 1
12+
RANGE_FLOAT_MIN = 0.0
13+
RANGE_FLOAT_MAX = 1.0
14+
15+
16+
def _column_range(col: pd.Series) -> Optional[List]:
17+
"""
18+
Calculate minimum and maximum of a column and outputs a list.
19+
20+
:param col: Column to inspect.
21+
:return: Min and max of the column range as a list.
22+
"""
23+
if col.dtype == np.float:
24+
if pd.isnull(min(col)): # This also means that maximum is null
25+
return [RANGE_FLOAT_MIN, RANGE_FLOAT_MAX]
26+
else:
27+
return [min(col), max(col)]
28+
elif col.dtype == np.integer:
29+
if pd.isnull(min(col)): # This also means that maximum is null
30+
return [RANGE_INTEGER_MIN, RANGE_INTEGER_MAX]
31+
else:
32+
return [min(col), max(col)]
33+
else:
34+
return np.NaN
35+
36+
37+
def _column_values(column: pd.Series) -> Union[List, float]:
38+
"""
39+
Randomly sample from a column. The number of items is num_repeats or the number of unique values whichever is lower.
40+
41+
:param column: Column to inspect.
42+
:return:
43+
"""
44+
if column.dtype != np.number:
45+
num_sample = len(column.unique())
46+
random.seed(8888)
47+
return random.sample(column.unique().tolist(), num_sample)
48+
else:
49+
return np.NaN
50+
51+
52+
def create_seldon_api_testing_file(data: pd.DataFrame, target: str, output_path: str) -> bool:
53+
"""
54+
Create a JSON file for Seldon API testing.
55+
56+
:param data: Pandas DataFrame used as a recipe for the json file.
57+
:param target: Name of the target column.
58+
:param output_path: Path of output file.
59+
:return: True if file correctly generated.
60+
"""
61+
62+
# create a Data frame in the form of JSON object
63+
df_for_json = pd.DataFrame(data=data.columns.values, columns=["name"])
64+
df_for_json["dtype"] = np.where(data.dtypes == np.float, 'FLOAT',
65+
np.where(data.dtypes == np.int, 'INTEGER', np.NaN))
66+
df_for_json["ftype"] = np.where(data.dtypes == np.number, 'continuous', 'categorical')
67+
ranges = [_column_range(data[column_name]) for column_name in data.columns.values]
68+
values = [_column_values(data[column_name]) for column_name in data.columns.values]
69+
df_for_json["range"] = ranges
70+
df_for_json["values"] = values
71+
# Split the target
72+
df_for_json_target = df_for_json[df_for_json.name == target]
73+
df_for_json_features = df_for_json[df_for_json.name != target]
74+
75+
# Convert data frames to JSON with a trick that removes records with NaNs
76+
json_features_df = df_for_json_features.T.apply(lambda row: row[~row.isnull()].to_json())
77+
json_features = f'[{",".join(json_features_df)}]'
78+
json_target_df = df_for_json_target.T.apply(lambda row: row[~row.isnull()].to_json())
79+
json_target = f'[{",".join(json_target_df)}]'
80+
json_combined = f'{{"features": {json_features}, "targets": {json_target}}}'
81+
82+
with open(output_path, 'w+') as output_file:
83+
output_file.write(str(json_combined))
84+
return os.path.exists(output_path)
85+

0 commit comments

Comments
 (0)