Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -167,4 +167,5 @@ LPs.zip
reports.csv
report.csv
**/concepts_learners/
data//results/*
**/data/
**/experiments/
41 changes: 40 additions & 1 deletion examples/concept_learning_cv_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import os
from typing import Union
import pandas as pd
from ontolearn.consyn.executor import ConSynExecutor
from ontolearn.knowledge_base import KnowledgeBase
from ontolearn.concept_learner import CELOE, EvoLearner, NCES, NCES2, ROCES, CLIP
from ontolearn.refinement_operators import ExpressRefinement, ModifiedCELOERefinement
Expand Down Expand Up @@ -115,6 +116,16 @@ def dl_concept_learning(args):
max_num_of_concepts_tested=int(1e9), max_runtime=args.max_runtime,
path_of_embeddings=args.path_of_clip_embeddings,
pretrained_predictor_name=["LSTM", "GRU", "SetTransformer"], load_pretrained=True)

if not args.learner_types or 'consyn' in args.learner_types:
consyn_executor = ConSynExecutor(
kb_path=args.kb, lps_path=args.lps,
verbose=getattr(args, "verbose", False),
device='cpu',
num_k_predictions=getattr(args, "num_k_predictions", 50)
)

consyn = consyn_executor.trainer

# dictionary to store the data
data = dict()
Expand Down Expand Up @@ -372,6 +383,34 @@ def dl_concept_learning(args):
print(f"CLIP Test Quality: {test_f1_clip:.3f}", end="\t")
print(f"CLIP Runtime: {rt_clip:.3f}")

if not args.learner_types or 'consyn' in args.learner_types:
print("ConSyn starts..", end="\t")
start_time = time.time()
# set use_sample_ratio to None for full use of the train_lp
pred_consyn = consyn.fit(knowledge_base=kb, target_concept=str_target_concept,
target_concept_lp=train_lp, path=consyn_executor.config['FIT_PATH'],
num_predictions=consyn.num_k_predictions).best_hypotheses()
rt_consyn = time.time() - start_time
consyn.cshs.clear(paradigm='fit')
print("ConSyn ends..", end="\t")
# () Quality on the training data
train_f1_consyn = compute_f1_score(individuals=frozenset({i for i in kb.individuals(pred_consyn)}),
pos=train_lp.pos,
neg=train_lp.neg)
# () Quality on test data
test_f1_consyn = compute_f1_score(individuals=frozenset({i for i in kb.individuals(pred_consyn)}),
pos=test_lp.pos,
neg=test_lp.neg)

data.setdefault("Train-F1-ConSyn", []).append(train_f1_consyn)
data.setdefault("Test-F1-ConSyn", []).append(test_f1_consyn)
data.setdefault("RT-ConSyn", []).append(rt_consyn)
print(f"ConSyn Train Quality: {train_f1_consyn:.3f}", end="\t")
print(f"ConSyn Test Quality: {test_f1_consyn:.3f}", end="\t")
print(f"ConSyn Runtime: {rt_consyn:.3f}")
print()
print()

df = pd.DataFrame.from_dict(data)
df.to_csv(args.report, index=False)
print(df)
Expand All @@ -386,7 +425,7 @@ def dl_concept_learning(args):
parser.add_argument("--kb", type=str, required=True,
help="Knowledge base")
parser.add_argument("--learner_types", type=str, nargs='*', default=None,
choices=["celoe", "ocel", "evolearner", "drill", "nces", "tdl", "nces2", "roces", "clip"],
choices=["celoe", "ocel", "evolearner", "drill", "nces", "tdl", "nces2", "roces", "clip", "consyn"],
help="List of available concept learning models")
parser.add_argument("--path_drill_embeddings", type=str, default=None)
parser.add_argument("--path_of_nces_embeddings", type=str, default=None)
Expand Down
2 changes: 1 addition & 1 deletion main.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
def get_default_arguments(description=None):
parser = ArgumentParser()

parser.add_argument("--model", type=str, default="celoe", choices=["celoe", "ocel", "evolearner", "nces", "tdl"],
parser.add_argument("--model", type=str, default="celoe", choices=["celoe", "ocel", "evolearner", "nces", "tdl", "consyn"],
help="Available concept learning models.")

# Knowledge graph related arguments
Expand Down
21 changes: 14 additions & 7 deletions ontolearn/abstracts.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,13 +78,20 @@ def score_elp(self, instances: set, learning_problem: EncodedLearningProblem) ->
if len(instances) == 0:
return False, 0
# @TODO: It must be moved to the top of the abstracts.py
from ontolearn.learning_problem import EncodedPosNegLPStandard
if isinstance(learning_problem, EncodedPosNegLPStandard):
tp = len(learning_problem.kb_pos.intersection(instances))
tn = len(learning_problem.kb_neg.difference(instances))

fp = len(learning_problem.kb_neg.intersection(instances))
fn = len(learning_problem.kb_pos.difference(instances))
from ontolearn.learning_problem import EncodedPosNegLPStandard, PosNegLPStandard
if isinstance(learning_problem, (EncodedPosNegLPStandard, PosNegLPStandard)):
if isinstance(learning_problem, EncodedPosNegLPStandard):
pos = learning_problem.kb_pos
neg = learning_problem.kb_neg
else:
pos = learning_problem.pos
neg = learning_problem.neg

tp = len(pos.intersection(instances))
tn = len(neg.difference(instances))
fp = len(neg.intersection(instances))
fn = len(pos.difference(instances))

return self.score2(tp=tp, tn=tn, fp=fp, fn=fn)
else:
raise NotImplementedError(learning_problem)
Expand Down
31 changes: 31 additions & 0 deletions ontolearn/consyn/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# -----------------------------------------------------------------------------
# MIT License
#
# Copyright (c) 2024 Ontolearn Team
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# -----------------------------------------------------------------------------

# from .architecture import ConSynGeneratorModel
# from .grammar import ConSynGrammarParser
# from .inference import ConSynInference
# from .reward import ConSynRewardFunction
# from .trainer import ConSynTrainer
# from .tokenizer import ConSynTokenizer
# from .utils import *
61 changes: 61 additions & 0 deletions ontolearn/consyn/configs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import os

import torch

BASE_DATA_DIR = 'data'
os.makedirs(BASE_DATA_DIR, exist_ok=True)

CONFIG = {
'seed': 42,
'd_model': 128,
'nhead': 1,
'num_layers': 8,
'dim_feedforward': 512,
'use_checkpointing': True,
'pre_norm': True,
'dropout': 0.1,
'learning_rate': 1e-4,
'num_epochs': 300,
'batch_size': 1,
# 'max_global_seq_len': 1500,
'max_output_seq_len': 30,
'log_interval': 5,
'grad_norm_clip': 1.0,
'device': 'cuda' if torch.cuda.is_available() else 'cpu',

'KNOWLEDGE_BASE_PATH': os.path.join(BASE_DATA_DIR, "KGs/Family/family-benchmark_rich_background.owl"),
'LEARNING_PROBLEM_PATH': os.path.join(BASE_DATA_DIR, "LPs/Family/lps.json"),
'split_dataset': True,

# --- Data Augmentation Configuration ---
# "apply_task_label_logical_aug": False, # Global flag to enable/disable logical aug for task labels
# "task_label_neg_sample_ratio": 0.0, # Probability of negating an original task label (0.0 to 1.0)
# "task_label_feat_aug_sample_ratio": 0.2,
# "individual_feat_aug_sample_ratio": 0.5, # Prob. an individual's features undergo logical content augmentation (0.0 to 1.0)
# "apply_indv_feat_rand_aug": False, # Global flag to enable/disable individual feature randomization (shuffling)
# "indv_feat_sample_ratio_for_shuffle": 0.5, # Ratio of individuals whose features will be considered for shuffling for randomization
# "indv_feat_shuffle_ratio": 0.5, # Ratio of features within a selected individual to shuffle for randomization (0.0 to 1.0)
'num_dataloader_workers': 0
}

owl_path = CONFIG['KNOWLEDGE_BASE_PATH']
base_folder_name = os.path.basename(os.path.dirname(owl_path))
experiment_dir = os.path.join("experiments", base_folder_name.lower())

# if CONFIG["apply_task_label_logical_aug"]:
# experiment_dir = os.path.join(experiment_dir, "augment")

os.makedirs(experiment_dir, exist_ok=True)

expr_data = experiment_dir+'/data'

CONFIG['EXPERIMENT_DIR'] = experiment_dir
CONFIG['GENERATED_DATA_PATH'] = expr_data+'/generated_raw_data.json'
CONFIG['TASK_LABEL_MAPPING_PATH'] = expr_data+'/task_label_mappings.json'

expr_fit_data = expr_data+'/fit'

CONFIG['FIT_PATH'] = {
'GENERATED_DATA_PATH': expr_fit_data + '/generated_raw_data.json',
'TASK_LABEL_MAPPING_PATH': expr_fit_data + '/task_label_mappings.json'
}
74 changes: 74 additions & 0 deletions ontolearn/consyn/executor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import logging
from typing import Optional
from torch.optim import AdamW

from ontolearn.consyn.configs import CONFIG
from ontolearn.consyn.intializer import Initializer
from ontolearn.consyn.trainer import ConSynTrainer
from ontolearn.consyn.model.model import ConSynGeneratorModel

logger = logging.getLogger(__name__)


class ConSynExecutor:
def __init__(self, kb_path, lps_path, num_k_predictions: int = 30, device: Optional[str] = None, verbose: bool = False):
CONFIG['KNOWLEDGE_BASE_PATH'] = kb_path
CONFIG['LEARNING_PROBLEM_PATH'] = lps_path

if device is not None:
CONFIG['device'] = device

self.num_k_predictions = num_k_predictions
self.verbose = verbose

self.config = CONFIG
self.device = CONFIG['device']

if self.verbose:
print(f"Using device: {self.device}\n")

# Initialize core components
initializer = Initializer(config=self.config, mode="fit", verbose=self.verbose)
components = initializer.get_components()

self.tokenizer = components['tokenizer']
self.grammar_parser = components['grammar_parser']
self.reward = components['reward']
self.heuristic = components['heuristic']

vocab_size = self.tokenizer.vocab_size

# Initialize model
self.model = ConSynGeneratorModel(
tokenizer=self.tokenizer,
input_vocab_size=vocab_size,
target_vocab_size=vocab_size,
embed_dim=self.config['d_model'],
num_encoder_layers=self.config['num_layers'],
num_decoder_layers=self.config['num_layers'],
num_heads=self.config['nhead'],
ff_dim=self.config['dim_feedforward'],
dropout_prob=self.config['dropout'],
num_segments=4,
use_checkpointing=self.config['use_checkpointing'],
pre_norm=self.config['pre_norm']
).to(self.device)

# Optimizer and trainer
optimizer = AdamW(self.model.parameters(), lr=1e-5)

self.trainer = ConSynTrainer(
model=self.model,
tokenizer=self.tokenizer,
grammar_parser=self.grammar_parser,
reward_function=self.reward,
heuristic_function=self.heuristic,
optimizer=optimizer,
device=self.device,
num_k_predictions=self.num_k_predictions,
max_gen_length=self.config['max_output_seq_len'],
expr_save_path=self.config['EXPERIMENT_DIR'],
verbose=self.verbose
)

self.trainer.prepare_for_fit(verbose=self.verbose)
Loading