dice-group · Demirrr · Mar 25, 2024 · Mar 25, 2024 · Mar 25, 2024 · Mar 25, 2024
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -23,5 +23,6 @@ jobs:
       - name: Test with pytest
         run: |
           wget https://files.dice-research.org/projects/Ontolearn/KGs.zip
-          unzip KGs.zip
+          wget https://files.dice-research.org/projects/Ontolearn/LPs.zip
+          unzip KGs.zip && unzip LPs.zip
           pytest -p no:warnings -x
diff --git a/ontolearn/base_concept_learner.py b/ontolearn/base_concept_learner.py
@@ -195,7 +195,6 @@ def fit(self, *args, **kwargs):
 
         Once finished, the results can be queried with the `best_hypotheses` function."""
         pass
-
     @abstractmethod
     def best_hypotheses(self, n=10) -> Iterable[_N]:
         """Get the current best found hypotheses according to the quality.
@@ -205,6 +204,9 @@ def best_hypotheses(self, n=10) -> Iterable[_N]:
 
         Returns:
             Iterable with hypotheses in form of search tree nodes.
+
+        @TODO: We need to write a a decorator for this function to convert each object into an instance of OWLclass epxression
+
         """
         pass
 

diff --git a/ontolearn/learners/drill.py b/ontolearn/learners/drill.py
@@ -2,7 +2,7 @@
 from ontolearn.refinement_operators import LengthBasedRefinement
 from ontolearn.abstracts import AbstractScorer, AbstractNode
 from ontolearn.search import RL_State
-from typing import Set, List, Tuple, Optional, Generator, SupportsFloat, Iterable, FrozenSet
+from typing import Set, List, Tuple, Optional, Generator, SupportsFloat, Iterable, FrozenSet, Callable
 from owlapy.model import OWLNamedIndividual, OWLClassExpression
 from ontolearn.learning_problem import PosNegLPStandard, EncodedPosNegLPStandard
 import torch
@@ -15,12 +15,15 @@
 import dicee
 import os
 from owlapy.render import DLSyntaxObjectRenderer
+# F1 class will be deprecated to become compute_f1_score function.
 from ontolearn.metrics import F1
+from ontolearn.utils.static_funcs import compute_f1_score
 import random
 from ontolearn.heuristics import CeloeBasedReward
 import torch
 from ontolearn.data_struct import PrepareBatchOfTraining, PrepareBatchOfPrediction
 
+
 class Drill(RefinementBasedConceptLearner):
     """ Neuro-Symbolic Class Expression Learning (https://www.ijcai.org/proceedings/2023/0403.pdf)"""
 
@@ -32,8 +35,8 @@ def __init__(self, knowledge_base,
                  use_data_properties=True,
                  use_card_restrictions=True,
                  card_limit=10,
-                 nominals=True,
-                 quality_func: AbstractScorer = None,
+                 use_nominals=True,
+                 quality_func: Callable = None,  # Abstractscore will be deprecated.
                  reward_func: object = None,
                  batch_size=None, num_workers: int = 1, pretrained_model_name=None,
                  iter_bound=None, max_num_of_concepts_tested=None, verbose: int = 0, terminate_on_goal=None,
@@ -65,7 +68,7 @@ def __init__(self, knowledge_base,
                                                         use_card_restrictions=use_card_restrictions,
                                                         card_limit=card_limit,
                                                         use_inverse=use_inverse,
-                                                        nominals=nominals)
+                                                        use_nominals=use_nominals)
         else:
             refinement_operator = refinement_operator
 
@@ -74,6 +77,7 @@ def __init__(self, knowledge_base,
             self.reward_func = CeloeBasedReward()
         else:
             self.reward_func = reward_func
+
         # (4) Params.
         self.num_workers = num_workers
         self.learning_rate = learning_rate
@@ -93,7 +97,7 @@ def __init__(self, knowledge_base,
         self.storage_path, _ = create_experiment_folder()
         self.search_tree = DRILLSearchTreePriorityQueue()
         self.renderer = DLSyntaxObjectRenderer()
-        self.stop_at_goal=stop_at_goal
+        self.stop_at_goal = stop_at_goal
 
         if self.pre_trained_kge:
             self.representation_mode = "averaging"
@@ -117,7 +121,7 @@ def __init__(self, knowledge_base,
         else:
             self.heuristic_func = CeloeBasedReward()
             self.representation_mode = None
-
+        # @CD: RefinementBasedConceptLearner redefines few attributes this should be avoided.
         RefinementBasedConceptLearner.__init__(self, knowledge_base=knowledge_base,
                                                refinement_operator=refinement_operator,
                                                quality_func=quality_func,
@@ -126,6 +130,9 @@ def __init__(self, knowledge_base,
                                                iter_bound=iter_bound,
                                                max_num_of_concepts_tested=max_num_of_concepts_tested,
                                                max_runtime=max_runtime)
+        # CD: This setting the valiable will be removed later.
+        self.quality_func = compute_f1_score
+
 
     def initialize_class_expression_learning_problem(self, pos: Set[OWLNamedIndividual], neg: Set[OWLNamedIndividual]):
         """
@@ -137,9 +144,9 @@ def initialize_class_expression_learning_problem(self, pos: Set[OWLNamedIndividu
         self.clean()
         assert 0 < len(pos) and 0 < len(neg)
 
-        # 1.
+        # 1. CD: PosNegLPStandard will be deprecated.
         # Generate a Learning Problem
-        self.learning_problem = PosNegLPStandard(pos=set(pos), neg=set(neg)).encode_kb(self.kb)
+        self.learning_problem = PosNegLPStandard(pos=set(pos), neg=set(neg))
         # 2. Obtain embeddings of positive and negative examples.
         if self.pre_trained_kge is None:
             self.emb_pos = None
@@ -175,7 +182,8 @@ def fit(self, learning_problem: PosNegLPStandard, max_runtime=None):
             [i for i in chain.from_iterable((self.kb.get_types(ind, direct=True) for ind in learning_problem.neg))])
         type_bias = pos_type_counts - neg_type_counts
         # (1) Initialize learning problem
-        root_state = self.initialize_class_expression_learning_problem(pos=learning_problem.pos, neg=learning_problem.neg)
+        root_state = self.initialize_class_expression_learning_problem(pos=learning_problem.pos,
+                                                                       neg=learning_problem.neg)
         # (2) Add root state into search tree
         root_state.heuristic = root_state.quality
         self.search_tree.add(root_state)
@@ -199,16 +207,13 @@ def fit(self, learning_problem: PosNegLPStandard, max_runtime=None):
                 # (2.1) If the next possible RL-state is not a dead end
                 # (2.1.) If the refinement of (1) is not equivalent to \bottom
 
-                if len(ref.instances):
-                    # Compute quality
-                    self.compute_quality_of_class_expression(ref)
-                    if ref.quality == 0:
-                        continue
-                    next_possible_states.append(ref)
-
-                    if self.stop_at_goal:
-                        if ref.quality == 1.0:
-                            break
+                self.compute_quality_of_class_expression(ref)
+                if ref.quality == 0:
+                    continue
+                next_possible_states.append(ref)
+                if self.stop_at_goal:
+                    if ref.quality == 1.0:
+                        break
             try:
                 assert len(next_possible_states) > 0
             except AssertionError:
@@ -305,24 +310,23 @@ def init_training(self, pos_uri: Set[OWLNamedIndividual], neg_uri: Set[OWLNamedI
     def create_rl_state(self, c: OWLClassExpression, parent_node: Optional[RL_State] = None,
                         is_root: bool = False) -> RL_State:
         """ Create an RL_State instance."""
-        instances: Generator
-        instances = set(self.kb.individuals(c))
-        instances_bitset: FrozenSet[OWLNamedIndividual]
-        instances_bitset = self.kb.individuals_set(c)
-
         if self.pre_trained_kge is not None:
             raise NotImplementedError("No pre-trained knowledge")
-
-        rl_state = RL_State(c, parent_node=parent_node,
-                            is_root=is_root,
-                            instances=instances,
-                            instances_bitset=instances_bitset, embeddings=None)
+        rl_state = RL_State(c, parent_node=parent_node, is_root=is_root)
         rl_state.length = self.kb.concept_len(c)
         return rl_state
 
     def compute_quality_of_class_expression(self, state: RL_State) -> None:
-        """ Compute Quality of owl class expression."""
-        self.quality_func.apply(state, state.instances_bitset, self.learning_problem)
+        """ Compute Quality of owl class expression.
+        # (1) Perform concept retrieval
+        # (2) Compute the quality w.r.t. (1), positive and negative examples
+        # (3) Increment the number of tested concepts attribute.
+
+        """
+        individuals = frozenset({i for i in self.kb.individuals(state.concept)})
+        quality = self.quality_func(individuals=individuals, pos=self.learning_problem.pos,
+                                    neg=self.learning_problem.neg)
+        state.quality=quality
         self._number_of_tested_concepts += 1
 
     def apply_refinement(self, rl_state: RL_State) -> Generator:

diff --git a/ontolearn/refinement_operators.py b/ontolearn/refinement_operators.py
@@ -28,15 +28,18 @@
 class LengthBasedRefinement(BaseRefinement):
     """ A top-down refinement operator in ALC."""
 
-    def __init__(self, knowledge_base: KnowledgeBase, use_inverse=False,
-                 use_data_properties=False, use_card_restrictions=False, card_limit=11, nominals=True):
+    def __init__(self, knowledge_base: KnowledgeBase,
+                 use_inverse: bool = False,
+                 use_data_properties: bool = False,
+                 use_card_restrictions: bool = False,
+                 card_limit=11, use_nominals: bool = True):
         super().__init__(knowledge_base)
 
         self.use_inverse = use_inverse
         self.use_data_properties = use_data_properties
         self.use_card_restrictions = use_card_restrictions
         self.card_limit = card_limit
-        self.nominals = nominals
+        self.use_nominals = use_nominals
 
         # 1. Number of named classes and sanity checking
         num_of_named_classes = len(set(i for i in self.kb.ontology.classes_in_signature()))
@@ -112,7 +115,7 @@ def refine_top(self) -> Iterable:
                          self.kb.generator.max_cardinality_restriction(c, inverse_role, card),
                          self.kb.generator.exact_cardinality_restriction(c, inverse_role, card)])
 
-        if self.nominals:
+        if self.use_nominals:
             temp = []
             for i in restrictions:
                 for j in self.kb.individuals(i.get_filler()):

diff --git a/ontolearn/search.py b/ontolearn/search.py
@@ -324,29 +324,17 @@ def __str__(self):
 class RL_State(_NodeConcept, _NodeQuality, _NodeHeuristic, AbstractNode, _NodeParentRef['RL_State']):
     renderer: ClassVar[OWLObjectRenderer] = DLSyntaxObjectRenderer()
     """RL_State node."""
-    __slots__ = '_concept', '_quality', '_heuristic', \
-        'embeddings', 'individuals', \
-        'instances_bitset', 'length', 'instances', 'parent_node', 'is_root', '_parent_ref', '__weakref__'
+    __slots__ = '_concept', '_quality', '_heuristic', 'length','parent_node', 'is_root', '_parent_ref', '__weakref__'
 
-    def __init__(self, concept: OWLClassExpression, parent_node: Optional['RL_State'] = None, is_root: bool = False,
-                 embeddings=None, instances: Set = None, instances_bitset: FrozenSet = None, length=None):
+    def __init__(self, concept: OWLClassExpression, parent_node: Optional['RL_State'] = None,
+                 is_root: bool = False, length=None):
         _NodeConcept.__init__(self, concept)
         _NodeQuality.__init__(self)
         _NodeHeuristic.__init__(self)
         _NodeParentRef.__init__(self, parent_node=parent_node, is_root=is_root)
-
-        assert isinstance(instances, set), f"Instances must be a set {type(instances)}"
-        assert isinstance(instances_bitset, frozenset), "Instances must be a set"
-        # TODO: CD _NodeParentRef causes unintended results:
-        #  Without using _NodeParentRef, one can reach the top class expression via recursive calling parent_node
-        #  However, if one uses _NodeParentRef amd comments self.parent_node and self.is_root, we can reach T.
         AbstractNode.__init__(self)
         self.parent_node = parent_node
         self.is_root = is_root
-
-        self.embeddings = embeddings  # tensor
-        self.instances = instances  # list
-        self.instances_bitset = instances_bitset  # bitset
         self.length = length
         self.__sanity_checking()
 
@@ -356,18 +344,11 @@ def __sanity_checking(self):
             assert self.parent_node
 
     def __str__(self):
-
-        if self.instances is None:
-            s = 'Not Init.'
-        else:
-            s = len(self.instances)
-
         return "\t".join((
             AbstractNode.__str__(self),
             _NodeConcept.__str__(self),
             _NodeQuality.__str__(self),
             _NodeHeuristic.__str__(self),
-            f'|Instance|:{s}',
             f'Length:{self.length}',
         ))