Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce Active Learning #130

Merged
merged 32 commits into from
Nov 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
4bcd493
Add scratch of basic modules and wrappers around Roboflow API needed …
PawelPeczek-Roboflow Oct 24, 2023
9c58047
Add basic functionalities to enable active learning
PawelPeczek-Roboflow Oct 24, 2023
da89a16
Add minimal working version of AL feature
PawelPeczek-Roboflow Oct 25, 2023
2f621d1
Add more fine-ground accounting of limits
PawelPeczek-Roboflow Oct 25, 2023
7319840
Fix bugs spotted while testing
PawelPeczek-Roboflow Oct 25, 2023
ca0d9d1
Change caching strategy to avoid locking to much during the execution
PawelPeczek-Roboflow Oct 26, 2023
315b3b1
Refactor decorator
PawelPeczek-Roboflow Oct 26, 2023
70e9507
Add background task pool execution AL manager for FastAPI inteface in…
PawelPeczek-Roboflow Oct 26, 2023
d4174c2
Add ThreadingActiveLearningMiddleware with thread-based registration
PawelPeczek-Roboflow Oct 26, 2023
1d307d2
Add Active Learning middleware to stream interfaces
PawelPeczek-Roboflow Oct 26, 2023
bbd7413
Fix issues spotted while stream interface testing
PawelPeczek-Roboflow Oct 26, 2023
674b9ed
Add more granular expiration periods and fix tests
PawelPeczek-Roboflow Oct 26, 2023
2cf05de
Fix isort
PawelPeczek-Roboflow Oct 26, 2023
caa54a7
Separate middlewares from active learning core
PawelPeczek-Roboflow Oct 26, 2023
9a17bdd
Add tests for active learning utils
PawelPeczek-Roboflow Oct 26, 2023
091c91a
Add tests for new functions to call Roboflow API
PawelPeczek-Roboflow Oct 26, 2023
8bb795d
Add tests for new functions to call Roboflow API
PawelPeczek-Roboflow Oct 26, 2023
755e0c9
Make api client more robust on missing key
PawelPeczek-Roboflow Oct 26, 2023
d24c866
Add tests for sampling
PawelPeczek-Roboflow Oct 27, 2023
65ea755
Merge branch 'main' into feature/introduce_active_learning
PawelPeczek-Roboflow Oct 27, 2023
f52c851
Add tests for encode_prediction()
PawelPeczek-Roboflow Oct 27, 2023
46b2b2d
Add tests for post_processing module
PawelPeczek-Roboflow Oct 27, 2023
a912b82
Add tests for entities module
PawelPeczek-Roboflow Oct 27, 2023
d2b7514
Add tests to active learning core module
PawelPeczek-Roboflow Oct 27, 2023
deccb88
Add tests for active learning configuration module
PawelPeczek-Roboflow Oct 27, 2023
7273b6a
Add tests for cache operations
PawelPeczek-Roboflow Oct 27, 2023
29d55ee
Add test case that covers situation when there is no limit for certai…
PawelPeczek-Roboflow Oct 27, 2023
5b4d914
Add tests for active learning batching
PawelPeczek-Roboflow Oct 27, 2023
c675807
Add tests for active learning accounting
PawelPeczek-Roboflow Oct 27, 2023
385a154
Add tests for standart middleware
PawelPeczek-Roboflow Oct 27, 2023
5cf969e
Add tests for threading middleware
PawelPeczek-Roboflow Oct 27, 2023
5519072
Resolve conflicts with main
PawelPeczek-Roboflow Oct 30, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 15 additions & 3 deletions docker/config/cpu_http.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from inference.core.cache import cache
from inference.core.interfaces.http.http_api import HttpInterface
from inference.core.managers.active_learning import ActiveLearningManager, BackgroundTaskActiveLearningManager
from inference.core.managers.base import ModelManager
from inference.core.managers.decorators.fixed_size_cache import WithFixedSizeCache
from inference.core.registries.roboflow import (
Expand All @@ -7,17 +9,27 @@
import os
from prometheus_fastapi_instrumentator import Instrumentator

from inference.core.env import MAX_ACTIVE_MODELS
from inference.core.env import MAX_ACTIVE_MODELS, ACTIVE_LEARNING_ENABLED, LAMBDA
from inference.models.utils import ROBOFLOW_MODEL_TYPES

model_registry = RoboflowModelRegistry(ROBOFLOW_MODEL_TYPES)

if ACTIVE_LEARNING_ENABLED:
if LAMBDA:
model_manager = ActiveLearningManager(model_registry=model_registry, cache=cache)
else:
model_manager = BackgroundTaskActiveLearningManager(model_registry=model_registry, cache=cache)
else:
model_manager = ModelManager(model_registry=model_registry)

model_manager = WithFixedSizeCache(
ModelManager(model_registry), max_size=MAX_ACTIVE_MODELS
model_manager,
max_size=MAX_ACTIVE_MODELS
)

model_manager.model_manager.init_pingback()
interface = HttpInterface(model_manager)
app = interface.app

# Setup Prometheus scraping endpoint at /metrics
# More info: https://github.com/trallnag/prometheus-fastapi-instrumentator
if os.environ.get("ENABLE_PROMETHEUS", False):
Expand Down
15 changes: 13 additions & 2 deletions docker/config/trt_http.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import os
from prometheus_fastapi_instrumentator import Instrumentator

from inference.core.env import MAX_ACTIVE_MODELS
from inference.core.cache import cache
from inference.core.env import MAX_ACTIVE_MODELS, ACTIVE_LEARNING_ENABLED, LAMBDA
from inference.core.interfaces.http.http_api import HttpInterface
from inference.core.managers.active_learning import ActiveLearningManager
from inference.core.managers.base import ModelManager
from inference.core.managers.decorators.fixed_size_cache import WithFixedSizeCache
from inference.core.registries.roboflow import (
Expand All @@ -12,8 +14,17 @@


model_registry = RoboflowModelRegistry(ROBOFLOW_MODEL_TYPES)

if ACTIVE_LEARNING_ENABLED:
if LAMBDA:
model_manager = ActiveLearningManager(model_registry=model_registry, cache=cache)
else:
model_manager = ActiveLearningManager(model_registry=model_registry, cache=cache)
else:
model_manager = ModelManager(model_registry=model_registry)

model_manager = WithFixedSizeCache(
ModelManager(model_registry), max_size=MAX_ACTIVE_MODELS
model_manager, max_size=MAX_ACTIVE_MODELS
)
model_manager.model_manager.init_pingback()
interface = HttpInterface(
Expand Down
Empty file.
63 changes: 63 additions & 0 deletions inference/core/active_learning/accounting.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
from typing import List, Optional

from inference.core.entities.types import DatasetID, WorkspaceID
from inference.core.roboflow_api import (
get_roboflow_labeling_batches,
get_roboflow_labeling_jobs,
)


def image_can_be_submitted_to_batch(
batch_name: str,
workspace_id: WorkspaceID,
dataset_id: DatasetID,
max_batch_images: Optional[int],
api_key: str,
) -> bool:
if max_batch_images is None:
return True
labeling_batches = get_roboflow_labeling_batches(
api_key=api_key,
workspace_id=workspace_id,
dataset_id=dataset_id,
)
matching_labeling_batch = get_matching_labeling_batch(
all_labeling_batches=labeling_batches["batches"],
batch_name=batch_name,
)
if matching_labeling_batch is None:
return max_batch_images > 0
batch_images_under_labeling = 0
if matching_labeling_batch["numJobs"] > 0:
labeling_jobs = get_roboflow_labeling_jobs(
api_key=api_key, workspace_id=workspace_id, dataset_id=dataset_id
)
batch_images_under_labeling = get_images_in_labeling_jobs_of_specific_batch(
all_labeling_jobs=labeling_jobs["jobs"],
batch_id=matching_labeling_batch["id"],
)
total_batch_images = matching_labeling_batch["images"] + batch_images_under_labeling
return max_batch_images > total_batch_images


def get_matching_labeling_batch(
all_labeling_batches: List[dict],
batch_name: str,
) -> Optional[dict]:
matching_batch = None
for labeling_batch in all_labeling_batches:
if labeling_batch["name"] == batch_name:
matching_batch = labeling_batch
break
return matching_batch


def get_images_in_labeling_jobs_of_specific_batch(
all_labeling_jobs: List[dict],
batch_id: str,
) -> int:
matching_jobs = []
for labeling_job in all_labeling_jobs:
if batch_id in labeling_job["sourceBatch"]:
matching_jobs.append(labeling_job)
return sum(job["numImages"] for job in matching_jobs)
26 changes: 26 additions & 0 deletions inference/core/active_learning/batching.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from inference.core.active_learning.entities import (
ActiveLearningConfiguration,
BatchReCreationInterval,
)
from inference.core.active_learning.utils import (
generate_start_timestamp_for_this_month,
generate_start_timestamp_for_this_week,
generate_today_timestamp,
)

RECREATION_INTERVAL2TIMESTAMP_GENERATOR = {
BatchReCreationInterval.DAILY: generate_today_timestamp,
BatchReCreationInterval.WEEKLY: generate_start_timestamp_for_this_week,
BatchReCreationInterval.MONTHLY: generate_start_timestamp_for_this_month,
}


def generate_batch_name(configuration: ActiveLearningConfiguration) -> str:
batch_name = configuration.batches_name_prefix
if configuration.batch_recreation_interval is BatchReCreationInterval.NEVER:
return batch_name
timestamp_generator = RECREATION_INTERVAL2TIMESTAMP_GENERATOR[
configuration.batch_recreation_interval
]
timestamp = timestamp_generator()
return f"{batch_name}_{timestamp}"
Loading