Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TripleStore via rdflib.graph #364

Merged
merged 6 commits into from
Mar 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,6 @@ jobs:
- name: Test with pytest
run: |
wget https://files.dice-research.org/projects/Ontolearn/KGs.zip
unzip KGs.zip
wget https://files.dice-research.org/projects/Ontolearn/LPs.zip
unzip KGs.zip && unzip LPs.zip
pytest -p no:warnings -x
4 changes: 3 additions & 1 deletion ontolearn/base_concept_learner.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,6 @@ def fit(self, *args, **kwargs):

Once finished, the results can be queried with the `best_hypotheses` function."""
pass

@abstractmethod
def best_hypotheses(self, n=10) -> Iterable[_N]:
"""Get the current best found hypotheses according to the quality.
Expand All @@ -205,6 +204,9 @@ def best_hypotheses(self, n=10) -> Iterable[_N]:

Returns:
Iterable with hypotheses in form of search tree nodes.

@TODO: We need to write a a decorator for this function to convert each object into an instance of OWLclass epxression

"""
pass

Expand Down
66 changes: 35 additions & 31 deletions ontolearn/learners/drill.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from ontolearn.refinement_operators import LengthBasedRefinement
from ontolearn.abstracts import AbstractScorer, AbstractNode
from ontolearn.search import RL_State
from typing import Set, List, Tuple, Optional, Generator, SupportsFloat, Iterable, FrozenSet
from typing import Set, List, Tuple, Optional, Generator, SupportsFloat, Iterable, FrozenSet, Callable
from owlapy.model import OWLNamedIndividual, OWLClassExpression
from ontolearn.learning_problem import PosNegLPStandard, EncodedPosNegLPStandard
import torch
Expand All @@ -15,12 +15,15 @@
import dicee
import os
from owlapy.render import DLSyntaxObjectRenderer
# F1 class will be deprecated to become compute_f1_score function.
from ontolearn.metrics import F1
from ontolearn.utils.static_funcs import compute_f1_score
import random
from ontolearn.heuristics import CeloeBasedReward
import torch
from ontolearn.data_struct import PrepareBatchOfTraining, PrepareBatchOfPrediction


class Drill(RefinementBasedConceptLearner):
""" Neuro-Symbolic Class Expression Learning (https://www.ijcai.org/proceedings/2023/0403.pdf)"""

Expand All @@ -32,8 +35,8 @@ def __init__(self, knowledge_base,
use_data_properties=True,
use_card_restrictions=True,
card_limit=10,
nominals=True,
quality_func: AbstractScorer = None,
use_nominals=True,
quality_func: Callable = None, # Abstractscore will be deprecated.
reward_func: object = None,
batch_size=None, num_workers: int = 1, pretrained_model_name=None,
iter_bound=None, max_num_of_concepts_tested=None, verbose: int = 0, terminate_on_goal=None,
Expand Down Expand Up @@ -65,7 +68,7 @@ def __init__(self, knowledge_base,
use_card_restrictions=use_card_restrictions,
card_limit=card_limit,
use_inverse=use_inverse,
nominals=nominals)
use_nominals=use_nominals)
else:
refinement_operator = refinement_operator

Expand All @@ -74,6 +77,7 @@ def __init__(self, knowledge_base,
self.reward_func = CeloeBasedReward()
else:
self.reward_func = reward_func

# (4) Params.
self.num_workers = num_workers
self.learning_rate = learning_rate
Expand All @@ -93,7 +97,7 @@ def __init__(self, knowledge_base,
self.storage_path, _ = create_experiment_folder()
self.search_tree = DRILLSearchTreePriorityQueue()
self.renderer = DLSyntaxObjectRenderer()
self.stop_at_goal=stop_at_goal
self.stop_at_goal = stop_at_goal

if self.pre_trained_kge:
self.representation_mode = "averaging"
Expand All @@ -117,7 +121,7 @@ def __init__(self, knowledge_base,
else:
self.heuristic_func = CeloeBasedReward()
self.representation_mode = None

# @CD: RefinementBasedConceptLearner redefines few attributes this should be avoided.
RefinementBasedConceptLearner.__init__(self, knowledge_base=knowledge_base,
refinement_operator=refinement_operator,
quality_func=quality_func,
Expand All @@ -126,6 +130,9 @@ def __init__(self, knowledge_base,
iter_bound=iter_bound,
max_num_of_concepts_tested=max_num_of_concepts_tested,
max_runtime=max_runtime)
# CD: This setting the valiable will be removed later.
self.quality_func = compute_f1_score


def initialize_class_expression_learning_problem(self, pos: Set[OWLNamedIndividual], neg: Set[OWLNamedIndividual]):
"""
Expand All @@ -137,9 +144,9 @@ def initialize_class_expression_learning_problem(self, pos: Set[OWLNamedIndividu
self.clean()
assert 0 < len(pos) and 0 < len(neg)

# 1.
# 1. CD: PosNegLPStandard will be deprecated.
# Generate a Learning Problem
self.learning_problem = PosNegLPStandard(pos=set(pos), neg=set(neg)).encode_kb(self.kb)
self.learning_problem = PosNegLPStandard(pos=set(pos), neg=set(neg))
# 2. Obtain embeddings of positive and negative examples.
if self.pre_trained_kge is None:
self.emb_pos = None
Expand Down Expand Up @@ -175,7 +182,8 @@ def fit(self, learning_problem: PosNegLPStandard, max_runtime=None):
[i for i in chain.from_iterable((self.kb.get_types(ind, direct=True) for ind in learning_problem.neg))])
type_bias = pos_type_counts - neg_type_counts
# (1) Initialize learning problem
root_state = self.initialize_class_expression_learning_problem(pos=learning_problem.pos, neg=learning_problem.neg)
root_state = self.initialize_class_expression_learning_problem(pos=learning_problem.pos,
neg=learning_problem.neg)
# (2) Add root state into search tree
root_state.heuristic = root_state.quality
self.search_tree.add(root_state)
Expand All @@ -199,16 +207,13 @@ def fit(self, learning_problem: PosNegLPStandard, max_runtime=None):
# (2.1) If the next possible RL-state is not a dead end
# (2.1.) If the refinement of (1) is not equivalent to \bottom

if len(ref.instances):
# Compute quality
self.compute_quality_of_class_expression(ref)
if ref.quality == 0:
continue
next_possible_states.append(ref)

if self.stop_at_goal:
if ref.quality == 1.0:
break
self.compute_quality_of_class_expression(ref)
if ref.quality == 0:
continue
next_possible_states.append(ref)
if self.stop_at_goal:
if ref.quality == 1.0:
break
try:
assert len(next_possible_states) > 0
except AssertionError:
Expand Down Expand Up @@ -305,24 +310,23 @@ def init_training(self, pos_uri: Set[OWLNamedIndividual], neg_uri: Set[OWLNamedI
def create_rl_state(self, c: OWLClassExpression, parent_node: Optional[RL_State] = None,
is_root: bool = False) -> RL_State:
""" Create an RL_State instance."""
instances: Generator
instances = set(self.kb.individuals(c))
instances_bitset: FrozenSet[OWLNamedIndividual]
instances_bitset = self.kb.individuals_set(c)

if self.pre_trained_kge is not None:
raise NotImplementedError("No pre-trained knowledge")

rl_state = RL_State(c, parent_node=parent_node,
is_root=is_root,
instances=instances,
instances_bitset=instances_bitset, embeddings=None)
rl_state = RL_State(c, parent_node=parent_node, is_root=is_root)
rl_state.length = self.kb.concept_len(c)
return rl_state

def compute_quality_of_class_expression(self, state: RL_State) -> None:
""" Compute Quality of owl class expression."""
self.quality_func.apply(state, state.instances_bitset, self.learning_problem)
""" Compute Quality of owl class expression.
# (1) Perform concept retrieval
# (2) Compute the quality w.r.t. (1), positive and negative examples
# (3) Increment the number of tested concepts attribute.

"""
individuals = frozenset({i for i in self.kb.individuals(state.concept)})
quality = self.quality_func(individuals=individuals, pos=self.learning_problem.pos,
neg=self.learning_problem.neg)
state.quality=quality
self._number_of_tested_concepts += 1

def apply_refinement(self, rl_state: RL_State) -> Generator:
Expand Down
11 changes: 7 additions & 4 deletions ontolearn/refinement_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,18 @@
class LengthBasedRefinement(BaseRefinement):
""" A top-down refinement operator in ALC."""

def __init__(self, knowledge_base: KnowledgeBase, use_inverse=False,
use_data_properties=False, use_card_restrictions=False, card_limit=11, nominals=True):
def __init__(self, knowledge_base: KnowledgeBase,
use_inverse: bool = False,
use_data_properties: bool = False,
use_card_restrictions: bool = False,
card_limit=11, use_nominals: bool = True):
super().__init__(knowledge_base)

self.use_inverse = use_inverse
self.use_data_properties = use_data_properties
self.use_card_restrictions = use_card_restrictions
self.card_limit = card_limit
self.nominals = nominals
self.use_nominals = use_nominals

# 1. Number of named classes and sanity checking
num_of_named_classes = len(set(i for i in self.kb.ontology.classes_in_signature()))
Expand Down Expand Up @@ -112,7 +115,7 @@ def refine_top(self) -> Iterable:
self.kb.generator.max_cardinality_restriction(c, inverse_role, card),
self.kb.generator.exact_cardinality_restriction(c, inverse_role, card)])

if self.nominals:
if self.use_nominals:
temp = []
for i in restrictions:
for j in self.kb.individuals(i.get_filler()):
Expand Down
25 changes: 3 additions & 22 deletions ontolearn/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,29 +324,17 @@ def __str__(self):
class RL_State(_NodeConcept, _NodeQuality, _NodeHeuristic, AbstractNode, _NodeParentRef['RL_State']):
renderer: ClassVar[OWLObjectRenderer] = DLSyntaxObjectRenderer()
"""RL_State node."""
__slots__ = '_concept', '_quality', '_heuristic', \
'embeddings', 'individuals', \
'instances_bitset', 'length', 'instances', 'parent_node', 'is_root', '_parent_ref', '__weakref__'
__slots__ = '_concept', '_quality', '_heuristic', 'length','parent_node', 'is_root', '_parent_ref', '__weakref__'

def __init__(self, concept: OWLClassExpression, parent_node: Optional['RL_State'] = None, is_root: bool = False,
embeddings=None, instances: Set = None, instances_bitset: FrozenSet = None, length=None):
def __init__(self, concept: OWLClassExpression, parent_node: Optional['RL_State'] = None,
is_root: bool = False, length=None):
_NodeConcept.__init__(self, concept)
_NodeQuality.__init__(self)
_NodeHeuristic.__init__(self)
_NodeParentRef.__init__(self, parent_node=parent_node, is_root=is_root)

assert isinstance(instances, set), f"Instances must be a set {type(instances)}"
assert isinstance(instances_bitset, frozenset), "Instances must be a set"
# TODO: CD _NodeParentRef causes unintended results:
# Without using _NodeParentRef, one can reach the top class expression via recursive calling parent_node
# However, if one uses _NodeParentRef amd comments self.parent_node and self.is_root, we can reach T.
AbstractNode.__init__(self)
self.parent_node = parent_node
self.is_root = is_root

self.embeddings = embeddings # tensor
self.instances = instances # list
self.instances_bitset = instances_bitset # bitset
self.length = length
self.__sanity_checking()

Expand All @@ -356,18 +344,11 @@ def __sanity_checking(self):
assert self.parent_node

def __str__(self):

if self.instances is None:
s = 'Not Init.'
else:
s = len(self.instances)

return "\t".join((
AbstractNode.__str__(self),
_NodeConcept.__str__(self),
_NodeQuality.__str__(self),
_NodeHeuristic.__str__(self),
f'|Instance|:{s}',
f'Length:{self.length}',
))

Expand Down
Loading
Loading