dice-group · moreshud · Oct 21, 2025 · Oct 21, 2025 · Oct 21, 2025 · Oct 21, 2025
diff --git a/.gitignore b/.gitignore
@@ -167,4 +167,5 @@ LPs.zip
 reports.csv
 report.csv
 **/concepts_learners/
-data//results/*
+**/data/
+**/experiments/
diff --git a/examples/concept_learning_cv_evaluation.py b/examples/concept_learning_cv_evaluation.py
@@ -10,6 +10,7 @@
 import os
 from typing import Union
 import pandas as pd
+from ontolearn.consyn.executor import ConSynExecutor
 from ontolearn.knowledge_base import KnowledgeBase
 from ontolearn.concept_learner import CELOE, EvoLearner, NCES, NCES2, ROCES, CLIP
 from ontolearn.refinement_operators import ExpressRefinement, ModifiedCELOERefinement
@@ -115,6 +116,16 @@ def dl_concept_learning(args):
                     max_num_of_concepts_tested=int(1e9), max_runtime=args.max_runtime,
                     path_of_embeddings=args.path_of_clip_embeddings,
                     pretrained_predictor_name=["LSTM", "GRU", "SetTransformer"], load_pretrained=True)
+
+    if not args.learner_types or 'consyn' in args.learner_types:
+        consyn_executor = ConSynExecutor(
+            kb_path=args.kb, lps_path=args.lps,
+            verbose=getattr(args, "verbose", False),
+            device='cpu',
+            num_k_predictions=getattr(args, "num_k_predictions", 50)
+        )
+
+        consyn = consyn_executor.trainer
 
     # dictionary to store the data
     data = dict()
@@ -372,6 +383,34 @@ def dl_concept_learning(args):
                 print(f"CLIP Test Quality: {test_f1_clip:.3f}", end="\t")
                 print(f"CLIP Runtime: {rt_clip:.3f}")
 
+            if not args.learner_types or 'consyn' in args.learner_types:
+                print("ConSyn starts..", end="\t")
+                start_time = time.time()
+                # set use_sample_ratio to None for full use of the train_lp
+                pred_consyn = consyn.fit(knowledge_base=kb, target_concept=str_target_concept, 
+                                         target_concept_lp=train_lp, path=consyn_executor.config['FIT_PATH'],
+                                         num_predictions=consyn.num_k_predictions).best_hypotheses()
+                rt_consyn = time.time() - start_time
+                consyn.cshs.clear(paradigm='fit')
+                print("ConSyn ends..", end="\t")
+                # () Quality on the training data
+                train_f1_consyn = compute_f1_score(individuals=frozenset({i for i in kb.individuals(pred_consyn)}),
+                                                pos=train_lp.pos,
+                                                neg=train_lp.neg)
+                # () Quality on test data
+                test_f1_consyn = compute_f1_score(individuals=frozenset({i for i in kb.individuals(pred_consyn)}),
+                                                pos=test_lp.pos,
+                                                neg=test_lp.neg)
+
+                data.setdefault("Train-F1-ConSyn", []).append(train_f1_consyn)
+                data.setdefault("Test-F1-ConSyn", []).append(test_f1_consyn)
+                data.setdefault("RT-ConSyn", []).append(rt_consyn)
+                print(f"ConSyn Train Quality: {train_f1_consyn:.3f}", end="\t")
+                print(f"ConSyn Test Quality: {test_f1_consyn:.3f}", end="\t")
+                print(f"ConSyn Runtime: {rt_consyn:.3f}")
+            print()
+        print()
+
     df = pd.DataFrame.from_dict(data)
     df.to_csv(args.report, index=False)
     print(df)
@@ -386,7 +425,7 @@ def dl_concept_learning(args):
     parser.add_argument("--kb", type=str, required=True,
                         help="Knowledge base")
     parser.add_argument("--learner_types", type=str, nargs='*', default=None, 
-                        choices=["celoe", "ocel", "evolearner", "drill", "nces", "tdl", "nces2", "roces", "clip"],
+                        choices=["celoe", "ocel", "evolearner", "drill", "nces", "tdl", "nces2", "roces", "clip", "consyn"],
                         help="List of available concept learning models")
     parser.add_argument("--path_drill_embeddings", type=str, default=None)
     parser.add_argument("--path_of_nces_embeddings", type=str, default=None)

diff --git a/main.py b/main.py
@@ -29,7 +29,7 @@
 def get_default_arguments(description=None):
     parser = ArgumentParser()
 
-    parser.add_argument("--model", type=str, default="celoe", choices=["celoe", "ocel", "evolearner", "nces", "tdl"],
+    parser.add_argument("--model", type=str, default="celoe", choices=["celoe", "ocel", "evolearner", "nces", "tdl", "consyn"],
                         help="Available concept learning models.")
 
     # Knowledge graph related arguments

diff --git a/ontolearn/abstracts.py b/ontolearn/abstracts.py
@@ -78,13 +78,20 @@ def score_elp(self, instances: set, learning_problem: EncodedLearningProblem) ->
         if len(instances) == 0:
             return False, 0
         # @TODO: It must be moved to the top of the abstracts.py
-        from ontolearn.learning_problem import EncodedPosNegLPStandard
-        if isinstance(learning_problem, EncodedPosNegLPStandard):
-            tp = len(learning_problem.kb_pos.intersection(instances))
-            tn = len(learning_problem.kb_neg.difference(instances))
-
-            fp = len(learning_problem.kb_neg.intersection(instances))
-            fn = len(learning_problem.kb_pos.difference(instances))
+        from ontolearn.learning_problem import EncodedPosNegLPStandard, PosNegLPStandard
+        if isinstance(learning_problem, (EncodedPosNegLPStandard, PosNegLPStandard)):
+            if isinstance(learning_problem, EncodedPosNegLPStandard):
+                pos = learning_problem.kb_pos
+                neg = learning_problem.kb_neg
+            else:
+                pos = learning_problem.pos
+                neg = learning_problem.neg
+
+            tp = len(pos.intersection(instances))
+            tn = len(neg.difference(instances))
+            fp = len(neg.intersection(instances))
+            fn = len(pos.difference(instances))
+
             return self.score2(tp=tp, tn=tn, fp=fp, fn=fn)
         else:
             raise NotImplementedError(learning_problem)

diff --git a/ontolearn/consyn/__init__.py b/ontolearn/consyn/__init__.py
@@ -0,0 +1,31 @@
+# -----------------------------------------------------------------------------
+# MIT License
+#
+# Copyright (c) 2024 Ontolearn Team
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+# -----------------------------------------------------------------------------
+
+# from .architecture import ConSynGeneratorModel
+# from .grammar import ConSynGrammarParser
+# from .inference import ConSynInference
+# from .reward import ConSynRewardFunction
+# from .trainer import ConSynTrainer
+# from .tokenizer import ConSynTokenizer
+# from .utils import *
diff --git a/ontolearn/consyn/configs.py b/ontolearn/consyn/configs.py
@@ -0,0 +1,61 @@
+import os
+
+import torch
+
+BASE_DATA_DIR = 'data'
+os.makedirs(BASE_DATA_DIR, exist_ok=True)
+
+CONFIG = {
+    'seed': 42,
+    'd_model': 128,
+    'nhead': 1,
+    'num_layers': 8,
+    'dim_feedforward': 512,
+    'use_checkpointing': True,
+    'pre_norm': True,
+    'dropout': 0.1,
+    'learning_rate': 1e-4,
+    'num_epochs': 300,
+    'batch_size': 1,
+    # 'max_global_seq_len': 1500,
+    'max_output_seq_len': 30,
+    'log_interval': 5,
+    'grad_norm_clip': 1.0,
+    'device': 'cuda' if torch.cuda.is_available() else 'cpu',
+
+    'KNOWLEDGE_BASE_PATH': os.path.join(BASE_DATA_DIR, "KGs/Family/family-benchmark_rich_background.owl"),
+    'LEARNING_PROBLEM_PATH': os.path.join(BASE_DATA_DIR, "LPs/Family/lps.json"),
+    'split_dataset': True,
+
+    # --- Data Augmentation Configuration ---
+    # "apply_task_label_logical_aug": False, # Global flag to enable/disable logical aug for task labels
+    # "task_label_neg_sample_ratio": 0.0, # Probability of negating an original task label (0.0 to 1.0)
+    # "task_label_feat_aug_sample_ratio": 0.2,
+    # "individual_feat_aug_sample_ratio": 0.5, # Prob. an individual's features undergo logical content augmentation (0.0 to 1.0)
+    # "apply_indv_feat_rand_aug": False, # Global flag to enable/disable individual feature randomization (shuffling)
+    # "indv_feat_sample_ratio_for_shuffle": 0.5, # Ratio of individuals whose features will be considered for shuffling for randomization
+    # "indv_feat_shuffle_ratio": 0.5, # Ratio of features within a selected individual to shuffle for randomization (0.0 to 1.0)
+    'num_dataloader_workers': 0
+}
+
+owl_path = CONFIG['KNOWLEDGE_BASE_PATH']
+base_folder_name = os.path.basename(os.path.dirname(owl_path))
+experiment_dir = os.path.join("experiments", base_folder_name.lower())
+
+# if CONFIG["apply_task_label_logical_aug"]:
+#     experiment_dir = os.path.join(experiment_dir, "augment")
+
+os.makedirs(experiment_dir, exist_ok=True)
+
+expr_data = experiment_dir+'/data'
+
+CONFIG['EXPERIMENT_DIR'] = experiment_dir
+CONFIG['GENERATED_DATA_PATH'] =  expr_data+'/generated_raw_data.json' 
+CONFIG['TASK_LABEL_MAPPING_PATH'] =  expr_data+'/task_label_mappings.json'
+
+expr_fit_data = expr_data+'/fit'
+
+CONFIG['FIT_PATH'] = {
+    'GENERATED_DATA_PATH': expr_fit_data + '/generated_raw_data.json',
+    'TASK_LABEL_MAPPING_PATH': expr_fit_data + '/task_label_mappings.json'
+}
diff --git a/ontolearn/consyn/executor.py b/ontolearn/consyn/executor.py
@@ -0,0 +1,74 @@
+import logging
+from typing import Optional
+from torch.optim import AdamW
+
+from ontolearn.consyn.configs import CONFIG
+from ontolearn.consyn.intializer import Initializer
+from ontolearn.consyn.trainer import ConSynTrainer
+from ontolearn.consyn.model.model import ConSynGeneratorModel
+
+logger = logging.getLogger(__name__)
+
+
+class ConSynExecutor:
+    def __init__(self, kb_path, lps_path, num_k_predictions: int = 30, device: Optional[str] = None, verbose: bool = False):
+        CONFIG['KNOWLEDGE_BASE_PATH'] = kb_path
+        CONFIG['LEARNING_PROBLEM_PATH'] = lps_path
+
+        if device is not None:
+            CONFIG['device'] = device
+
+        self.num_k_predictions = num_k_predictions
+        self.verbose = verbose
+
+        self.config = CONFIG
+        self.device = CONFIG['device']
+
+        if self.verbose:
+            print(f"Using device: {self.device}\n")
+
+        # Initialize core components
+        initializer = Initializer(config=self.config, mode="fit", verbose=self.verbose)
+        components = initializer.get_components()
+
+        self.tokenizer = components['tokenizer']
+        self.grammar_parser = components['grammar_parser']
+        self.reward = components['reward']
+        self.heuristic = components['heuristic']
+
+        vocab_size = self.tokenizer.vocab_size
+
+        # Initialize model
+        self.model = ConSynGeneratorModel(
+            tokenizer=self.tokenizer,
+            input_vocab_size=vocab_size,
+            target_vocab_size=vocab_size,
+            embed_dim=self.config['d_model'],
+            num_encoder_layers=self.config['num_layers'],
+            num_decoder_layers=self.config['num_layers'],
+            num_heads=self.config['nhead'],
+            ff_dim=self.config['dim_feedforward'],
+            dropout_prob=self.config['dropout'],
+            num_segments=4,
+            use_checkpointing=self.config['use_checkpointing'],
+            pre_norm=self.config['pre_norm']
+        ).to(self.device)
+
+        # Optimizer and trainer
+        optimizer = AdamW(self.model.parameters(), lr=1e-5)
+
+        self.trainer = ConSynTrainer(
+            model=self.model,
+            tokenizer=self.tokenizer,
+            grammar_parser=self.grammar_parser,
+            reward_function=self.reward,
+            heuristic_function=self.heuristic,
+            optimizer=optimizer,
+            device=self.device,
+            num_k_predictions=self.num_k_predictions,
+            max_gen_length=self.config['max_output_seq_len'],
+            expr_save_path=self.config['EXPERIMENT_DIR'],
+            verbose=self.verbose
+        )
+
+        self.trainer.prepare_for_fit(verbose=self.verbose)