Skip to content

Commit

Permalink
Simplify hints using float for Union[int, float]
Browse files Browse the repository at this point in the history
  • Loading branch information
juhoinkinen committed May 24, 2023
1 parent d2c5e53 commit f242a98
Show file tree
Hide file tree
Showing 10 changed files with 28 additions and 34 deletions.
6 changes: 2 additions & 4 deletions annif/backend/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,7 @@ def modification_time(self) -> Optional[datetime.datetime]:

def _get_backend_params(
self,
params: Optional[
Union[Dict[str, str], Dict[str, int], Dict[str, Union[float, int]]]
],
params: Optional[Union[Dict[str, str], Dict[str, int], Dict[str, float]]],
) -> Dict[str, Any]:
backend_params = dict(self.params)
if params is not None:
Expand All @@ -83,7 +81,7 @@ def _train(
def train(
self,
corpus: DocumentCorpus,
params: Optional[Union[Dict[str, Union[float, int]], Dict[str, int]]] = None,
params: Optional[Union[Dict[str, float], Dict[str, int]]] = None,
jobs: int = 0,
) -> None:
"""Train the model on the given document or subject corpus."""
Expand Down
2 changes: 1 addition & 1 deletion annif/backend/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def _merge_source_batches(
)

def _suggest_batch(
self, texts: List[str], params: Dict[str, Union[int, float, str]]
self, texts: List[str], params: Dict[str, Union[float, str]]
) -> SuggestionBatch:
sources = annif.util.parse_sources(params["sources"])
batch_by_source = self._suggest_with_sources(texts, sources)
Expand Down
10 changes: 4 additions & 6 deletions annif/backend/fasttext.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ class FastTextBackend(mixins.ChunkingBackend, backend.AnnifBackend):
# defaults for uninitialized instances
_model = None

def default_params(self) -> Dict[str, Union[int, float, str]]:
def default_params(self) -> Dict[str, Union[float, str]]:
params = backend.AnnifBackend.DEFAULT_PARAMETERS.copy()
params.update(mixins.ChunkingBackend.DEFAULT_PARAMETERS)
params.update(self.DEFAULT_PARAMETERS)
Expand Down Expand Up @@ -119,9 +119,7 @@ def _create_train_file(
corpus, self.datadir, self.TRAIN_FILE, method=self._write_train_file
)

def _create_model(
self, params: Dict[str, Union[int, float, str]], jobs: int
) -> None:
def _create_model(self, params: Dict[str, Union[float, str]], jobs: int) -> None:
self.info("creating fastText model")
trainpath = os.path.join(self.datadir, self.TRAIN_FILE)
modelpath = os.path.join(self.datadir, self.MODEL_FILE)
Expand All @@ -139,7 +137,7 @@ def _create_model(
def _train(
self,
corpus: DocumentCorpus,
params: Dict[str, Union[int, float, str]],
params: Dict[str, Union[float, str]],
jobs: int = 0,
) -> None:
if corpus != "cached":
Expand All @@ -165,7 +163,7 @@ def _predict_chunks(
)

def _suggest_chunks(
self, chunktexts: List[str], params: Dict[str, Union[int, float, str]]
self, chunktexts: List[str], params: Dict[str, Union[float, str]]
) -> List[SubjectSuggestion]:
limit = int(params["limit"])
chunklabels, chunkscores = self._predict_chunks(chunktexts, limit)
Expand Down
8 changes: 4 additions & 4 deletions annif/backend/mllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ class MLLMBackend(hyperopt.AnnifHyperoptBackend):
def get_hp_optimizer(self, corpus: DocumentCorpus, metric: str) -> MLLMOptimizer:
return MLLMOptimizer(self, corpus, metric)

def default_params(self) -> Dict[str, Union[int, float, bool]]:
def default_params(self) -> Dict[str, Union[float, bool]]:
params = backend.AnnifBackend.DEFAULT_PARAMETERS.copy()
params.update(self.DEFAULT_PARAMETERS)
return params
Expand Down Expand Up @@ -124,7 +124,7 @@ def initialize(self, parallel: bool = False) -> None:
def _train(
self,
corpus: DocumentCorpus,
params: Dict[str, Union[int, float, bool, str]],
params: Dict[str, Union[float, bool, str]],
jobs: int = 0,
) -> None:
self.info("starting train")
Expand Down Expand Up @@ -158,15 +158,15 @@ def _generate_candidates(self, text: str) -> List[Union[Candidate, Any]]:
def _prediction_to_result(
self,
prediction: List[Union[Tuple[np.float64, int], Any]],
params: Dict[str, Union[int, float, bool, str]],
params: Dict[str, Union[float, bool, str]],
) -> Iterator[Any]:
vector = np.zeros(len(self.project.subjects), dtype=np.float32)
for score, subject_id in prediction:
vector[subject_id] = score
return vector_to_suggestions(vector, int(params["limit"]))

def _suggest(
self, text: str, params: Dict[str, Union[int, float, bool, str]]
self, text: str, params: Dict[str, Union[float, bool, str]]
) -> Iterator[Any]:
candidates = self._generate_candidates(text)
prediction = self._model.predict(candidates)
Expand Down
8 changes: 4 additions & 4 deletions annif/backend/nn_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ class NNEnsembleBackend(backend.AnnifLearningBackend, ensemble.BaseEnsembleBacke
# defaults for uninitialized instances
_model = None

def default_params(self) -> Dict[str, Union[int, float, str]]:
def default_params(self) -> Dict[str, Union[float, str]]:
params = backend.AnnifBackend.DEFAULT_PARAMETERS.copy()
params.update(self.DEFAULT_PARAMETERS)
return params
Expand Down Expand Up @@ -140,7 +140,7 @@ def _merge_source_batches(
self,
batch_by_source: Dict[str, SuggestionBatch],
sources: List[Tuple[str, float]],
params: Dict[str, Union[int, float, str]],
params: Dict[str, Union[float, str]],
) -> SuggestionBatch:
src_weight = dict(sources)
score_vectors = np.array(
Expand Down Expand Up @@ -199,7 +199,7 @@ def _create_model(self, sources: List[Tuple[str, float]]) -> None:
def _train(
self,
corpus: DocumentCorpus,
params: Dict[str, Union[int, float, str]],
params: Dict[str, Union[float, str]],
jobs: int = 0,
) -> None:
sources = annif.util.parse_sources(self.params["sources"])
Expand Down Expand Up @@ -286,7 +286,7 @@ def _fit_model(
def _learn(
self,
corpus: DocumentCorpus,
params: Dict[str, Union[int, float, str]],
params: Dict[str, Union[float, str]],
) -> None:
self.initialize()
self._fit_model(
Expand Down
2 changes: 1 addition & 1 deletion annif/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ def results(
metrics: Union[Tuple[str, str], Tuple[()], List[str]] = [],
results_file: Optional[Union[LazyFile, TextIOWrapper]] = None,
language: Optional[str] = None,
) -> Dict[str, Union[np.float64, float, int]]:
) -> Dict[str, Union[np.float64, float]]:
"""evaluate a set of selected subjects against a gold standard using
different metrics. If metrics is empty, use all available metrics.
If results_file (file object) given, write results per subject to it
Expand Down
12 changes: 6 additions & 6 deletions annif/lexical/mllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def _candidates_to_features(self, candidates: List[Candidate]) -> np.ndarray:

@staticmethod
def _get_label_props(
params: Dict[str, Union[int, float, bool, str]]
params: Dict[str, Union[float, bool, str]]
) -> Tuple[List[URIRef], List[URIRef]]:
pref_label_props = [SKOS.prefLabel]

Expand All @@ -189,7 +189,7 @@ def _prepare_terms(
self,
graph: Graph,
vocab: AnnifVocabulary,
params: Dict[str, Union[int, float, bool, str]],
params: Dict[str, Union[float, bool, str]],
) -> Tuple[List[Term], List[int]]:
pref_label_props, nonpref_label_props = self._get_label_props(params)

Expand Down Expand Up @@ -220,7 +220,7 @@ def _prepare_train_index(
self,
vocab: AnnifVocabulary,
analyzer: Analyzer,
params: Dict[str, Union[int, float, bool, str]],
params: Dict[str, Union[float, bool, str]],
) -> List[int]:
graph = vocab.as_graph()
terms, subject_ids = self._prepare_terms(graph, vocab, params)
Expand Down Expand Up @@ -305,7 +305,7 @@ def prepare_train(
corpus: DocumentCorpus,
vocab: AnnifVocabulary,
analyzer: Analyzer,
params: Dict[str, Union[int, float, bool, str]],
params: Dict[str, Union[float, bool, str]],
n_jobs: int,
) -> Tuple[np.ndarray, np.ndarray]:
# create an index from the vocabulary terms
Expand All @@ -323,7 +323,7 @@ def prepare_train(
return (np.vstack(features), np.array(train_y))

def _create_classifier(
self, params: Dict[str, Union[int, float, bool, str]]
self, params: Dict[str, Union[float, bool, str]]
) -> BaggingClassifier:
return BaggingClassifier(
DecisionTreeClassifier(
Expand All @@ -337,7 +337,7 @@ def train(
self,
train_x: Union[np.ndarray, List[Tuple[int, int]]],
train_y: Union[List[bool], np.ndarray],
params: Dict[str, Union[int, float, bool, str]],
params: Dict[str, Union[float, bool, str]],
) -> None:
# fit the model on the training corpus
self._classifier = self._create_classifier(params)
Expand Down
4 changes: 2 additions & 2 deletions annif/rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def _is_error(


def suggest(
project_id: str, body: Dict[str, Union[int, float, str]]
project_id: str, body: Dict[str, Union[float, str]]
) -> Union[
Dict[str, List[Any]],
Dict[str, List[Dict[str, Optional[Union[str, float]]]]],
Expand Down Expand Up @@ -174,7 +174,7 @@ def suggest_batch(
def _suggest(
project_id: str,
documents: List[Union[Dict[str, str], Any]],
parameters: Dict[str, Union[int, float, str]],
parameters: Dict[str, Union[float, str]],
) -> Union[
List[Dict[str, List[Any]]],
List[Dict[str, List[Dict[str, Optional[Union[str, float]]]]]],
Expand Down
6 changes: 3 additions & 3 deletions annif/suggestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import collections
import itertools
from typing import TYPE_CHECKING, Any, Iterator, List, Optional, Union
from typing import TYPE_CHECKING, Any, Iterator, List, Optional

import numpy as np
from scipy.sparse import csr_array
Expand All @@ -25,7 +25,7 @@ def vector_to_suggestions(vector: np.ndarray, limit: int) -> Iterator[Any]:
def filter_suggestion(
preds: csr_array,
limit: Optional[int] = None,
threshold: Union[int, float] = 0.0,
threshold: float = 0.0,
) -> csr_array:
"""filter a 2D sparse suggestion array (csr_array), retaining only the
top K suggestions with a score above or equal to the threshold for each
Expand Down Expand Up @@ -111,7 +111,7 @@ def from_sequence(

@classmethod
def from_averaged(
cls, batches: List[SuggestionBatch], weights: List[Union[int, float]]
cls, batches: List[SuggestionBatch], weights: List[float]
) -> SuggestionBatch:
"""Create a new SuggestionBatch where the subject scores are the
weighted average of scores in several SuggestionBatches"""
Expand Down
4 changes: 1 addition & 3 deletions annif/transform/inputlimiter.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,7 @@
class InputLimiter(transform.BaseTransform):
name = "limit"

def __init__(
self, project: Optional[AnnifProject], input_limit: str
) -> None:
def __init__(self, project: Optional[AnnifProject], input_limit: str) -> None:
super().__init__(project)
self.input_limit = int(input_limit)
self._validate_value(self.input_limit)
Expand Down

0 comments on commit f242a98

Please sign in to comment.