Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement set_knowledge method in BaseAgent (fixes #2385) #2386

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 64 additions & 16 deletions src/crewai/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,25 +134,73 @@ def _setup_agent_executor(self):
self.cache_handler = CacheHandler()
self.set_cache_handler(self.cache_handler)

def set_knowledge(self, crew_embedder: Optional[Dict[str, Any]] = None):
def set_knowledge(
self,
knowledge_sources: Optional[List[BaseKnowledgeSource]] = None,
embedder_config: Optional[Dict[str, Any]] = None
) -> None:
"""Set knowledge sources for the agent with optional embedder configuration.

This method allows agents to integrate external knowledge sources for enhanced
contextual understanding and information retrieval during task execution.

Args:
knowledge_sources: List of knowledge sources to integrate. These can include
various data types such as text files, PDFs, CSV files, JSON files,
web pages, YouTube videos, and documentation websites.
embedder_config: Configuration for embedding generation. If not provided,
a default configuration will be used.

Raises:
ValueError: If the provided knowledge sources are invalid.
TypeError: If knowledge_sources is not a list or None.
ValueError: If embedder_config is missing required keys.

Example:
```python
from crewai.knowledge.source import StringKnowledgeSource

content = "The capital of France is Paris."
source = StringKnowledgeSource(content=content)

agent.set_knowledge(
knowledge_sources=[source],
embedder_config={"provider": "openai", "model": "text-embedding-3-small"}
)
```
"""
try:
if self.embedder is None and crew_embedder:
self.embedder = crew_embedder

# Handle backward compatibility with crew_embedder
if embedder_config and self.embedder is None:
self.embedder = embedder_config

# Validate knowledge sources
if knowledge_sources is not None:
if not isinstance(knowledge_sources, list):
raise TypeError("knowledge_sources must be a list or None")

if not all(isinstance(k, BaseKnowledgeSource) for k in knowledge_sources):
raise ValueError("All knowledge sources must be instances of BaseKnowledgeSource")

self.knowledge_sources = knowledge_sources

# Create knowledge object if knowledge sources are provided
if self.knowledge_sources:
full_pattern = re.compile(r"[^a-zA-Z0-9\-_\r\n]|(\.\.)")
knowledge_agent_name = f"{re.sub(full_pattern, '_', self.role)}"
if isinstance(self.knowledge_sources, list) and all(
isinstance(k, BaseKnowledgeSource) for k in self.knowledge_sources
):
self.knowledge = Knowledge(
sources=self.knowledge_sources,
embedder=self.embedder,
collection_name=knowledge_agent_name,
storage=self.knowledge_storage or None,
)
except (TypeError, ValueError) as e:
raise ValueError(f"Invalid Knowledge Configuration: {str(e)}")
# Create a unique collection name based on agent role and id
knowledge_agent_name = f"{re.sub(full_pattern, '_', self.role)}_{id(self)}"
self.knowledge = Knowledge(
sources=self.knowledge_sources,
embedder=self.embedder,
collection_name=knowledge_agent_name,
storage=self.knowledge_storage or None,
)
except TypeError as e:
raise TypeError(f"Invalid Knowledge Configuration Type: {str(e)}")
except ValueError as e:
raise ValueError(f"Invalid Knowledge Configuration Value: {str(e)}")
except Exception as e:
raise ValueError(f"Error setting knowledge: {str(e)}")

def execute_task(
self,
Expand Down
79 changes: 76 additions & 3 deletions src/crewai/agents/agent_builder/base_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from abc import ABC, abstractmethod
from copy import copy as shallow_copy
from hashlib import md5
from typing import Any, Dict, List, Optional, TypeVar
from typing import Any, Dict, List, Optional, TypeVar, Union, cast

from pydantic import (
UUID4,
Expand Down Expand Up @@ -148,6 +148,10 @@ class BaseAgent(ABC, BaseModel):
default=None,
description="Custom knowledge storage for the agent.",
)
embedder_config: Optional[Dict[str, Any]] = Field(
default=None,
description="Configuration for embedding generation.",
)
security_config: SecurityConfig = Field(
default_factory=SecurityConfig,
description="Security configuration for the agent, including fingerprinting.",
Expand Down Expand Up @@ -362,5 +366,74 @@ def set_rpm_controller(self, rpm_controller: RPMController) -> None:
self._rpm_controller = rpm_controller
self.create_agent_executor()

def set_knowledge(self, crew_embedder: Optional[Dict[str, Any]] = None):
pass
def set_knowledge(
self,
knowledge_sources: Optional[List[BaseKnowledgeSource]] = None,
embedder_config: Optional[Dict[str, Any]] = None
) -> None:
"""Set knowledge sources for the agent with optional embedder configuration.

This method allows agents to integrate external knowledge sources for enhanced
contextual understanding and information retrieval during task execution.

Args:
knowledge_sources: List of knowledge sources to integrate. These can include
various data types such as text files, PDFs, CSV files, JSON files,
web pages, YouTube videos, and documentation websites.
embedder_config: Configuration for embedding generation. If not provided,
a default configuration will be used.

Raises:
ValueError: If the provided knowledge sources are invalid.
TypeError: If knowledge_sources is not a list or None.
ValueError: If embedder_config is missing required keys.

Example:
```python
from crewai.knowledge.source import StringKnowledgeSource

content = "The capital of France is Paris."
source = StringKnowledgeSource(content=content)

agent.set_knowledge(
knowledge_sources=[source],
embedder_config={"provider": "openai", "model": "text-embedding-3-small"}
)
```
"""
try:
# Validate knowledge sources first
if knowledge_sources is not None:
if not isinstance(knowledge_sources, list):
raise TypeError("knowledge_sources must be a list or None")

if not all(isinstance(k, BaseKnowledgeSource) for k in knowledge_sources):
raise ValueError("All knowledge sources must be instances of BaseKnowledgeSource")

self.knowledge_sources = knowledge_sources

# Validate embedder configuration
if embedder_config is not None:
if not isinstance(embedder_config, dict):
raise TypeError("embedder_config must be a dictionary or None")

if "provider" not in embedder_config:
raise ValueError("embedder_config must contain a 'provider' key")

self.embedder_config = embedder_config

# Create knowledge object if knowledge sources are provided
if self.knowledge_sources:
# Create a unique collection name based on agent role and id
knowledge_agent_name = f"{self.role.replace(' ', '_')}_{id(self)}"
self.knowledge = Knowledge(
sources=self.knowledge_sources,
embedder_config=self.embedder_config,
collection_name=knowledge_agent_name,
)
except TypeError as e:
raise TypeError(f"Invalid Knowledge Configuration Type: {str(e)}")
except ValueError as e:
raise ValueError(f"Invalid Knowledge Configuration Value: {str(e)}")
except Exception as e:
raise ValueError(f"Error setting knowledge: {str(e)}")
2 changes: 1 addition & 1 deletion src/crewai/crew.py
Original file line number Diff line number Diff line change
Expand Up @@ -621,7 +621,7 @@ def kickoff(
agent.i18n = i18n
# type: ignore[attr-defined] # Argument 1 to "_interpolate_inputs" of "Crew" has incompatible type "dict[str, Any] | None"; expected "dict[str, Any]"
agent.crew = self # type: ignore[attr-defined]
agent.set_knowledge(crew_embedder=self.embedder)
agent.set_knowledge(embedder_config=self.embedder)
# TODO: Create an AgentFunctionCalling protocol for future refactoring
if not agent.function_calling_llm: # type: ignore # "BaseAgent" has no attribute "function_calling_llm"
agent.function_calling_llm = self.function_calling_llm # type: ignore # "BaseAgent" has no attribute "function_calling_llm"
Expand Down
70 changes: 70 additions & 0 deletions tests/agent_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1586,6 +1586,76 @@ def test_agent_execute_task_with_ollama():
assert "AI" in result or "artificial intelligence" in result.lower()


@pytest.mark.vcr(filter_headers=["authorization"])
def test_base_agent_set_knowledge():
"""Test that set_knowledge correctly sets knowledge sources and creates a Knowledge object."""
from crewai.agents.agent_builder.base_agent import BaseAgent
from crewai.knowledge.knowledge import Knowledge

# Create a test implementation of BaseAgent
class TestAgent(BaseAgent):
def execute_task(self, task, context=None, tools=None):
return "Test execution"

def create_agent_executor(self, tools=None):
pass

def _parse_tools(self, tools):
return tools

def get_delegation_tools(self, agents):
return []

def get_output_converter(self, llm, text, model, instructions):
return None

# Create a knowledge source with some content
content = "The capital of France is Paris."
string_source = StringKnowledgeSource(content=content)

# Create an agent
agent = TestAgent(
role="Test Agent",
goal="Test Goal",
backstory="Test Backstory",
)

# Mock the Knowledge class to avoid API calls
with patch("crewai.agents.agent_builder.base_agent.Knowledge") as MockKnowledge:
mock_knowledge_instance = MockKnowledge.return_value
mock_knowledge_instance.sources = [string_source]

# Test setting knowledge
agent.set_knowledge(knowledge_sources=[string_source])

# Verify that knowledge was set correctly
assert agent.knowledge_sources == [string_source]
assert agent.knowledge is not None
assert MockKnowledge.called
# Check that collection name starts with the agent role (now includes unique ID)
assert MockKnowledge.call_args[1]["collection_name"].startswith("Test_Agent_")

# Test with embedder config
embedder_config = {
"provider": "openai",
"model": "text-embedding-3-small"
}

agent.set_knowledge(
knowledge_sources=[string_source],
embedder_config=embedder_config
)

assert agent.embedder_config == embedder_config
assert MockKnowledge.call_args[1]["embedder_config"] == embedder_config

# Test with invalid knowledge source - we need to directly test the validation logic
# rather than relying on the Knowledge class to raise an error
with pytest.raises(ValueError):
# This will trigger the validation check in set_knowledge
agent.set_knowledge(knowledge_sources=["invalid source"])


@pytest.mark.vcr(filter_headers=["authorization"])
def test_agent_with_knowledge_sources():
# Create a knowledge source with some content
Expand Down
77 changes: 77 additions & 0 deletions tests/cassettes/test_base_agent_set_knowledge.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
interactions:
- request:
body: '{"input": ["The capital of France is Paris."], "model": "text-embedding-3-small",
"encoding_format": "base64"}'
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate
connection:
- keep-alive
content-length:
- '110'
content-type:
- application/json
host:
- api.openai.com
user-agent:
- OpenAI/Python 1.61.0
x-stainless-arch:
- x64
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- Linux
x-stainless-package-version:
- 1.61.0
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.12.7
method: POST
uri: https://api.openai.com/v1/embeddings
response:
content: "{\n \"error\": {\n \"message\": \"Incorrect API key provided:
sk-proj-********************************************************************************************************************************************************sLcA.
You can find your API key at https://platform.openai.com/account/api-keys.\",\n
\ \"type\": \"invalid_request_error\",\n \"param\": null,\n \"code\":
\"invalid_api_key\"\n }\n}\n"
headers:
CF-RAY:
- 9219d2095edc680f-SEA
Connection:
- keep-alive
Content-Length:
- '414'
Content-Type:
- application/json; charset=utf-8
Date:
- Mon, 17 Mar 2025 04:41:52 GMT
Server:
- cloudflare
Set-Cookie:
- __cf_bm=VL2xCt_SZgziztqb6CFL0smPGPhRpbELZKUdSHPmfLQ-1742186512-1.0.1.1-UFayToBt3jFkEkjQwZJ7A4KLy0.uZK9sqwbNqpMQ75dMEz2hycNU3NwtXor0NmM7k7XsdxtcXPfv.JcVjYatku_yE3I6qMEMGsgoog.guDU;
path=/; expires=Mon, 17-Mar-25 05:11:52 GMT; domain=.api.openai.com; HttpOnly;
Secure; SameSite=None
- _cfuvid=O4ymzjmuwsEutsmbHpzKDz4uyyZNA1tSUX0M.FNCjro-1742186512991-0.0.1.1-604800000;
path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
X-Content-Type-Options:
- nosniff
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
vary:
- Origin
x-request-id:
- req_62700144d22a58e93c0464aa643af3ec
http_version: HTTP/1.1
status_code: 401
version: 1