Skip to content

Commit

Permalink
Merge pull request #59 from outbrain/rank_joined_featured_from_modelspec
Browse files Browse the repository at this point in the history
compute ranking for joined features in modelspec
  • Loading branch information
bmramor authored Jan 16, 2024
2 parents 161da77 + 9577a61 commit f29ff14
Show file tree
Hide file tree
Showing 5 changed files with 16 additions and 8 deletions.
2 changes: 1 addition & 1 deletion outrank/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def main():
parser.add_argument(
'--reference_model_JSON',
type=str,
default='./ranking_outputs/reference_model.json',
default='',
help='Reference model JSON',
)

Expand Down
14 changes: 9 additions & 5 deletions outrank/core_ranking.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,11 +187,15 @@ def compute_combined_features(
join_string = ' AND_REL ' if is_3mr else ' AND '
interaction_order = 2 if is_3mr else args.interaction_order

full_combination_space = list(
itertools.combinations(all_columns, interaction_order),
)
if args.reference_model_JSON != '':
combined_features = extract_features_from_reference_JSON(args.reference_model_JSON, combined_features_only = True)
full_combination_space = [combination.split(',') for combination in combined_features]
else:
full_combination_space = list(
itertools.combinations(all_columns, interaction_order),
)

if args.combination_number_upper_bound:
if args.combination_number_upper_bound and args.reference_model_JSON != '':
random.shuffle(full_combination_space)
full_combination_space = full_combination_space[
: args.combination_number_upper_bound
Expand Down Expand Up @@ -517,7 +521,7 @@ def compute_batch_ranking(
input_dataframe, logger, args, pbar,
)

if args.interaction_order > 1:
if args.interaction_order > 1 or args.reference_model_JSON:
pbar.set_description('Constructing new features')
input_dataframe = compute_combined_features(
input_dataframe, logger, args, pbar,
Expand Down
5 changes: 4 additions & 1 deletion outrank/core_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ def parse_csv_raw(data_path) -> DatasetInformationStorage:
)


def extract_features_from_reference_JSON(json_path: str) -> set[Any]:
def extract_features_from_reference_JSON(json_path: str, combined_features_only = False) -> set[Any]:
"""Given a model's JSON, extract unique features"""

with open(json_path) as jp:
Expand All @@ -404,6 +404,9 @@ def extract_features_from_reference_JSON(json_path: str) -> set[Any]:
fields_space = content['desc'].get('fields', [])
joint_space = feature_space + fields_space

if combined_features_only:
return {feature for feature in feature_space if len(feature.split(','))>1}

for feature_tuple in joint_space:
for individual_feature in feature_tuple.split(','):
unique_features.add(individual_feature)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def _read_description():
packages = [x for x in setuptools.find_packages() if x != 'test']
setuptools.setup(
name='outrank',
version='0.95.5',
version='0.95.6',
description='OutRank: Feature ranking for massive sparse data sets.',
long_description=_read_description(),
long_description_content_type='text/markdown',
Expand Down
1 change: 1 addition & 0 deletions tests/ranking_module_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class args:
combination_number_upper_bound: int = 1024
disable_tqdm: bool = False
mi_stratified_sampling_ratio: float = 1.0
reference_model_JSON: str = ''


class CompareStrategiesTest(unittest.TestCase):
Expand Down

0 comments on commit f29ff14

Please sign in to comment.