Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix names #37

Merged
merged 3 commits into from
Nov 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion autointent/context/optimization_info/data_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from numpy.typing import NDArray
from pydantic import BaseModel, ConfigDict, Field

from autointent.custom_types import NodeType


class Artifact(BaseModel): ...

Expand Down Expand Up @@ -40,7 +42,7 @@ class PredictorArtifact(Artifact):


def validate_node_name(value: str) -> str:
if value in ["regexp", "retrieval", "scoring", "prediction"]:
if value in [NodeType.retrieval, NodeType.scoring, NodeType.prediction, NodeType.regexp]:
return value
msg = f"Unknown node_type: {value}. Expected one of ['regexp', 'retrieval', 'scoring', 'prediction']"
raise ValueError(msg)
Expand Down
17 changes: 7 additions & 10 deletions autointent/context/optimization_info/optimization_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from numpy.typing import NDArray

from autointent.configs.node import InferenceNodeConfig
from autointent.custom_types import NODE_TYPES, NodeType
from autointent.logger import get_logger

from .data_models import Artifact, Artifacts, RetrieverArtifact, ScorerArtifact, Trial, Trials, TrialsIds
Expand Down Expand Up @@ -71,32 +72,28 @@ def _get_best_artifact(self, node_type: str) -> RetrieverArtifact | ScorerArtifa
return self.artifacts.get_best_artifact(node_type, i_best)

def get_best_embedder(self) -> str:
best_retriever_artifact: RetrieverArtifact = self._get_best_artifact(node_type="retrieval") # type: ignore[assignment]
best_retriever_artifact: RetrieverArtifact = self._get_best_artifact(node_type=NodeType.retrieval) # type: ignore[assignment]
return best_retriever_artifact.embedder_name

def get_best_test_scores(self) -> NDArray[np.float64] | None:
best_scorer_artifact: ScorerArtifact = self._get_best_artifact(node_type="scoring") # type: ignore[assignment]
best_scorer_artifact: ScorerArtifact = self._get_best_artifact(node_type=NodeType.scoring) # type: ignore[assignment]
return best_scorer_artifact.test_scores

def get_best_oos_scores(self) -> NDArray[np.float64] | None:
best_scorer_artifact: ScorerArtifact = self._get_best_artifact(node_type="scoring") # type: ignore[assignment]
best_scorer_artifact: ScorerArtifact = self._get_best_artifact(node_type=NodeType.scoring) # type: ignore[assignment]
return best_scorer_artifact.oos_scores

def dump_evaluation_results(self) -> dict[str, dict[str, list[float]]]:
node_wise_metrics = {
node_type: self._get_metrics_values(node_type)
for node_type in ["regexp", "retrieval", "scoring", "prediction"]
}
node_wise_metrics = {node_type.value: self._get_metrics_values(node_type) for node_type in NODE_TYPES}
return {
"metrics": node_wise_metrics,
"configs": self.trials.model_dump(),
}

def get_inference_nodes_config(self) -> list[InferenceNodeConfig]:
node_types = ["regexp", "retrieval", "scoring", "prediction"]
trial_ids = [self._get_best_trial_idx(node_type) for node_type in node_types]
trial_ids = [self._get_best_trial_idx(node_type) for node_type in NODE_TYPES]
res = []
for idx, node_type in zip(trial_ids, node_types, strict=True):
for idx, node_type in zip(trial_ids, NODE_TYPES, strict=True):
if idx is None:
continue
trial = self.trials.get_trial(node_type, idx)
Expand Down
10 changes: 10 additions & 0 deletions autointent/custom_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,13 @@ class LogLevel(Enum):

class BaseMetadataDict(TypedDict):
pass


class NodeType(str, Enum):
retrieval = "retrieval"
prediction = "prediction"
scoring = "scoring"
regexp = "regexp"


NODE_TYPES = [NodeType.retrieval, NodeType.prediction, NodeType.scoring, NodeType.regexp]
2 changes: 1 addition & 1 deletion autointent/generation/prompt_scheme.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
class PromptDescription(BaseModel):
text: str = PROMPT_DESCRIPTION

@field_validator("text")
@classmethod
@field_validator("text")
def check_valid_prompt(cls, value: str) -> str:
if value.find("{intent_name}") == -1 or value.find("{user_utterances}") == -1:
text_error = (
Expand Down
45 changes: 21 additions & 24 deletions autointent/modules/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import TypeVar

from .base import Module
from .prediction import (
ArgmaxPredictor,
Expand All @@ -10,36 +12,31 @@
from .retrieval import RetrievalModule, VectorDBModule
from .scoring import DescriptionScorer, DNNCScorer, KNNScorer, LinearScorer, MLKnnScorer, ScoringModule

RETRIEVAL_MODULES_MULTICLASS: dict[str, type[Module]] = {
"vector_db": VectorDBModule,
}
T = TypeVar("T", bound=Module)


def create_modules_dict(modules: list[type[T]]) -> dict[str, type[T]]:
return {module.name: module for module in modules}


RETRIEVAL_MODULES_MULTICLASS: dict[str, type[Module]] = create_modules_dict([VectorDBModule])

RETRIEVAL_MODULES_MULTILABEL = RETRIEVAL_MODULES_MULTICLASS

SCORING_MODULES_MULTICLASS: dict[str, type[ScoringModule]] = {
"dnnc": DNNCScorer,
"knn": KNNScorer,
"linear": LinearScorer,
"description": DescriptionScorer,
}
SCORING_MODULES_MULTICLASS: dict[str, type[ScoringModule]] = create_modules_dict(
[DNNCScorer, KNNScorer, LinearScorer, DescriptionScorer]
)

SCORING_MODULES_MULTILABEL: dict[str, type[ScoringModule]] = create_modules_dict(
[MLKnnScorer, LinearScorer, DescriptionScorer]
)

SCORING_MODULES_MULTILABEL: dict[str, type[ScoringModule]] = {
"knn": KNNScorer,
"linear": LinearScorer,
"mlknn": MLKnnScorer,
}
PREDICTION_MODULES_MULTICLASS: dict[str, type[Module]] = create_modules_dict(
[ArgmaxPredictor, JinoosPredictor, ThresholdPredictor, TunablePredictor]
)

PREDICTION_MODULES_MULTICLASS: dict[str, type[Module]] = {
"argmax": ArgmaxPredictor,
"jinoos": JinoosPredictor,
"threshold": ThresholdPredictor,
"tunable": TunablePredictor,
}
PREDICTION_MODULES_MULTILABEL: dict[str, type[Module]] = create_modules_dict([ThresholdPredictor, TunablePredictor])

PREDICTION_MODULES_MULTILABEL: dict[str, type[Module]] = {
"threshold": ThresholdPredictor,
"tunable": TunablePredictor,
}
__all__ = [
"Module",
"ArgmaxPredictor",
Expand Down
2 changes: 2 additions & 0 deletions autointent/modules/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@


class Module(ABC):
name: str

metadata_dict_name: str = "metadata.json"
metadata: BaseMetadataDict

Expand Down
2 changes: 2 additions & 0 deletions autointent/modules/prediction/argmax.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,12 @@

class ArgmaxPredictor(PredictionModule):
metadata = {} # noqa: RUF012
name = "argmax"

def __init__(self) -> None:
pass


@classmethod
def from_context(cls, context: Context) -> Self:
return cls()
Expand Down
1 change: 1 addition & 0 deletions autointent/modules/prediction/jinoos.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ class JinoosPredictorDumpMetadata(BaseMetadataDict):

class JinoosPredictor(PredictionModule):
thresh: float
name = "jinoos"

def __init__(
self,
Expand Down
1 change: 1 addition & 0 deletions autointent/modules/prediction/threshold.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ class ThresholdPredictor(PredictionModule):
metadata: ThresholdPredictorDumpMetadata
multilabel: bool
tags: list[Tag] | None
name = "threshold"

def __init__(
self,
Expand Down
2 changes: 2 additions & 0 deletions autointent/modules/prediction/tunable.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ class TunablePredictorDumpMetadata(BaseMetadataDict):


class TunablePredictor(PredictionModule):
name = "tunable"

def __init__(
self,
n_trials: int = 320,
Expand Down
2 changes: 2 additions & 0 deletions autointent/modules/regexp.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ class RegexPatternsCompiled(TypedDict):


class RegExp(Module):
name = "regexp"

def __init__(self, regexp_patterns: list[RegexPatterns]) -> None:
self.regexp_patterns = regexp_patterns

Expand Down
1 change: 1 addition & 0 deletions autointent/modules/retrieval/vectordb.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ class VectorDBMetadata(BaseMetadataDict):

class VectorDBModule(RetrievalModule):
vector_index: VectorIndex
name = "vector_db"

def __init__(
self,
Expand Down
4 changes: 3 additions & 1 deletion autointent/modules/scoring/description/description.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from autointent.context import Context
from autointent.context.embedder import Embedder
from autointent.context.vector_index_client import VectorIndexClient
from autointent.context.vector_index_client import VectorIndex, VectorIndexClient
from autointent.context.vector_index_client.cache import get_db_dir
from autointent.custom_types import LabelType
from autointent.modules.scoring.base import ScoringModule
Expand All @@ -29,6 +29,8 @@ class DescriptionScorer(ScoringModule):
embedder: Embedder
precomputed_embeddings: bool = False
embedding_model_subdir: str = "embedding_model"
_vector_index: VectorIndex
name = "description"

def __init__(
self,
Expand Down
2 changes: 2 additions & 0 deletions autointent/modules/scoring/dnnc/dnnc.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ class DNNCScorer(ScoringModule):
- inspect batch size of model.predict?
"""

name = "dnnc"

crossencoder_subdir: str = "crossencoder"
model: CrossEncoder | CrossEncoderWithLogreg
prebuilt_index: bool = False
Expand Down
1 change: 1 addition & 0 deletions autointent/modules/scoring/knn/knn.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class KNNScorerDumpMetadata(BaseMetadataDict):
class KNNScorer(ScoringModule):
weights: WEIGHT_TYPES
_vector_index: VectorIndex
name = "knn"
prebuilt_index: bool = False

def __init__(
Expand Down
1 change: 1 addition & 0 deletions autointent/modules/scoring/linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ class LinearScorer(ScoringModule):
embedding_model_subdir: str = "embedding_model"
precomputed_embeddings: bool = False
db_dir: str
name = "linear"

def __init__(
self,
Expand Down
1 change: 1 addition & 0 deletions autointent/modules/scoring/mlknn/mlknn.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ class MLKnnScorer(ScoringModule):
arrays_filename: str = "probs.npz"
metadata: MLKnnScorerDumpMetadata
prebuilt_index: bool = False
name = "mlknn"

def __init__(
self,
Expand Down
8 changes: 5 additions & 3 deletions autointent/nodes/nodes_info/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
from autointent.custom_types import NodeType

from .base import NodeInfo
from .prediction import PredictionNodeInfo
from .retrieval import RetrievalNodeInfo
from .scoring import ScoringNodeInfo

NODES_INFO: dict[str, NodeInfo] = {
"retrieval": RetrievalNodeInfo(),
"scoring": ScoringNodeInfo(),
"prediction": PredictionNodeInfo(),
NodeType.retrieval: RetrievalNodeInfo(),
NodeType.scoring: ScoringNodeInfo(),
NodeType.prediction: PredictionNodeInfo(),
}

__all__ = ["NodeInfo", "PredictionNodeInfo", "RetrievalNodeInfo", "ScoringNodeInfo", "NODES_INFO"]
3 changes: 2 additions & 1 deletion autointent/nodes/nodes_info/base.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from collections.abc import Mapping
from typing import ClassVar

from autointent.custom_types import NodeType
from autointent.metrics import METRIC_FN
from autointent.modules import Module


class NodeInfo:
metrics_available: ClassVar[Mapping[str, METRIC_FN]]
modules_available: ClassVar[Mapping[str, type[Module]]]
node_type: str
node_type: NodeType
3 changes: 2 additions & 1 deletion autointent/nodes/nodes_info/prediction.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from collections.abc import Mapping
from typing import ClassVar

from autointent.custom_types import NodeType
from autointent.metrics import PREDICTION_METRICS_MULTICLASS, PREDICTION_METRICS_MULTILABEL, PredictionMetricFn
from autointent.modules import PREDICTION_MODULES_MULTICLASS, PREDICTION_MODULES_MULTILABEL, Module

Expand All @@ -14,4 +15,4 @@ class PredictionNodeInfo(NodeInfo):

modules_available: ClassVar[dict[str, type[Module]]] = PREDICTION_MODULES_MULTICLASS | PREDICTION_MODULES_MULTILABEL

node_type = "prediction"
node_type = NodeType.prediction
5 changes: 3 additions & 2 deletions autointent/nodes/nodes_info/regexp.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from collections.abc import Mapping
from typing import ClassVar

from autointent.custom_types import NodeType
from autointent.metrics import regexp_partial_accuracy, regexp_partial_precision
from autointent.metrics.regexp import RegexpMetricFn
from autointent.modules import Module, RegExp
Expand All @@ -15,6 +16,6 @@ class RegExpNode(NodeInfo):
regexp_partial_precision,
)

modules_available: ClassVar[Mapping[str, type[Module]]] = {"regexp": RegExp}
modules_available: ClassVar[Mapping[str, type[Module]]] = {NodeType.regexp: RegExp}

node_type = "regexp"
node_type = NodeType.regexp
3 changes: 2 additions & 1 deletion autointent/nodes/nodes_info/retrieval.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from collections.abc import Mapping
from typing import ClassVar

from autointent.custom_types import NodeType
from autointent.metrics import (
RETRIEVAL_METRICS_MULTICLASS,
RETRIEVAL_METRICS_MULTILABEL,
Expand All @@ -20,4 +21,4 @@ class RetrievalNodeInfo(NodeInfo):
RETRIEVAL_MODULES_MULTICLASS | RETRIEVAL_MODULES_MULTILABEL
)

node_type = "retrieval"
node_type = NodeType.retrieval
3 changes: 2 additions & 1 deletion autointent/nodes/nodes_info/scoring.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from collections.abc import Mapping
from typing import ClassVar

from autointent.custom_types import NodeType
from autointent.metrics import SCORING_METRICS_MULTICLASS, SCORING_METRICS_MULTILABEL, ScoringMetricFn
from autointent.modules import SCORING_MODULES_MULTICLASS, SCORING_MODULES_MULTILABEL, ScoringModule

Expand All @@ -14,4 +15,4 @@ class ScoringNodeInfo(NodeInfo):
SCORING_MODULES_MULTICLASS | SCORING_MODULES_MULTILABEL
)

node_type = "scoring"
node_type = NodeType.scoring
6 changes: 3 additions & 3 deletions autointent/pipeline/inference/inference_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from hydra.utils import instantiate

from autointent.configs.inference_pipeline import InferencePipelineConfig
from autointent.custom_types import LabelType
from autointent.custom_types import LabelType, NodeType
from autointent.nodes.inference import InferenceNode


Expand All @@ -16,8 +16,8 @@ def from_dict_config(cls, config: dict[str, Any]) -> "InferencePipeline":
return instantiate(InferencePipelineConfig, **config) # type: ignore[no-any-return]

def predict(self, utterances: list[str]) -> list[LabelType]:
scores = self.nodes["scoring"].module.predict(utterances)
return self.nodes["prediction"].module.predict(scores) # type: ignore[return-value]
scores = self.nodes[NodeType.scoring].module.predict(utterances)
return self.nodes[NodeType.prediction].module.predict(scores) # type: ignore[return-value]

def fit(self, utterances: list[str], labels: list[LabelType]) -> None:
pass
3 changes: 2 additions & 1 deletion autointent/pipeline/optimization/pipeline_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from autointent import Context
from autointent.configs.pipeline_optimizer import PipelineOptimizerConfig
from autointent.custom_types import NodeType
from autointent.nodes import NodeOptimizer

from .utils import NumpyEncoder
Expand Down Expand Up @@ -70,7 +71,7 @@ def dump(self, logs_dir: str | Path | None) -> None:
yaml.dump(inference_config, file)


def make_report(logs: dict[str, Any], nodes: list[str]) -> str:
def make_report(logs: dict[str, Any], nodes: list[NodeType]) -> str:
ids = [np.argmax(logs["metrics"][node]) for node in nodes]
configs = []
for i, node in zip(ids, nodes, strict=False):
Expand Down
Loading