From ec4a8ec87b1bf24071b28c550d194010f8a0f7f3 Mon Sep 17 00:00:00 2001 From: Tony Kipkemboi Date: Thu, 20 Feb 2025 18:06:00 -0500 Subject: [PATCH 1/3] docs: add Qdrant vector search tool documentation --- docs/mint.json | 1 + docs/tools/qdrantvectorsearchtool.mdx | 271 ++++++++++++++++++++++++++ 2 files changed, 272 insertions(+) create mode 100644 docs/tools/qdrantvectorsearchtool.mdx diff --git a/docs/mint.json b/docs/mint.json index fb0dcfdf51..9b49648aaa 100644 --- a/docs/mint.json +++ b/docs/mint.json @@ -139,6 +139,7 @@ "tools/nl2sqltool", "tools/pdfsearchtool", "tools/pgsearchtool", + "tools/qdrantvectorsearchtool", "tools/scrapewebsitetool", "tools/seleniumscrapingtool", "tools/spidertool", diff --git a/docs/tools/qdrantvectorsearchtool.mdx b/docs/tools/qdrantvectorsearchtool.mdx new file mode 100644 index 0000000000..da3dcb1a21 --- /dev/null +++ b/docs/tools/qdrantvectorsearchtool.mdx @@ -0,0 +1,271 @@ +--- +title: 'Qdrant Vector Search Tool' +description: 'Semantic search capabilities for CrewAI agents using Qdrant vector database' +icon: magnifying-glass-plus +--- + +# `QdrantVectorSearchTool` + +The Qdrant Vector Search Tool enables semantic search capabilities in your CrewAI agents by leveraging [Qdrant](https://qdrant.tech/), a vector similarity search engine. This tool allows your agents to search through documents stored in a Qdrant collection using semantic similarity. + +## Installation + +Install the required packages: + +```bash +uv pip install 'crewai[tools] qdrant-client' +``` + +## Basic Usage + +Here's a minimal example of how to use the tool: + +```python +from crewai import Agent +from crewai_tools import QdrantVectorSearchTool + +# Initialize the tool +qdrant_tool = QdrantVectorSearchTool( + qdrant_url="your_qdrant_url", + qdrant_api_key="your_qdrant_api_key", + collection_name="your_collection" +) + +# Create an agent that uses the tool +agent = Agent( + role="Research Assistant", + goal="Find relevant information in documents", + tools=[qdrant_tool] +) + +# The tool will automatically use OpenAI embeddings +# and return the 3 most relevant results with scores > 0.35 +``` + +## Complete Working Example + +Here's a complete example showing how to: +1. Extract text from a PDF +2. Generate embeddings using OpenAI +3. Store in Qdrant +4. Create a CrewAI agentic RAG workflow for semantic search + +```python +import os +import uuid +import pdfplumber +from openai import OpenAI +from dotenv import load_dotenv +from crewai import Agent, Task, Crew, Process, LLM +from crewai_tools import QdrantVectorSearchTool +from qdrant_client import QdrantClient +from qdrant_client.models import PointStruct, Distance, VectorParams + +# Load environment variables +load_dotenv() + +# Initialize OpenAI client +client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) + +# Extract text from PDF +def extract_text_from_pdf(pdf_path): + text = [] + with pdfplumber.open(pdf_path) as pdf: + for page in pdf.pages: + page_text = page.extract_text() + if page_text: + text.append(page_text.strip()) + return text + +# Generate OpenAI embeddings +def get_openai_embedding(text): + response = client.embeddings.create( + input=text, + model="text-embedding-3-small" + ) + return response.data[0].embedding + +# Store text and embeddings in Qdrant +def load_pdf_to_qdrant(pdf_path, qdrant, collection_name): + # Extract text from PDF + text_chunks = extract_text_from_pdf(pdf_path) + + # Create Qdrant collection + if qdrant.collection_exists(collection_name): + qdrant.delete_collection(collection_name) + qdrant.create_collection( + collection_name=collection_name, + vectors_config=VectorParams(size=1536, distance=Distance.COSINE) + ) + + # Store embeddings + points = [] + for chunk in text_chunks: + embedding = get_openai_embedding(chunk) + points.append(PointStruct( + id=str(uuid.uuid4()), + vector=embedding, + payload={"text": chunk} + )) + qdrant.upsert(collection_name=collection_name, points=points) + +# Initialize Qdrant client and load data +qdrant = QdrantClient( + url=os.getenv("QDRANT_URL"), + api_key=os.getenv("QDRANT_API_KEY") +) +collection_name = "example_collection" +pdf_path = "path/to/your/document.pdf" +load_pdf_to_qdrant(pdf_path, qdrant, collection_name) + +# Initialize Qdrant search tool +qdrant_tool = QdrantVectorSearchTool( + qdrant_url=os.getenv("QDRANT_URL"), + qdrant_api_key=os.getenv("QDRANT_API_KEY"), + collection_name=collection_name, + limit=3, + score_threshold=0.35 +) + +# Create CrewAI agents +search_agent = Agent( + role="Senior Semantic Search Agent", + goal="Find and analyze documents based on semantic search", + backstory="""You are an expert research assistant who can find relevant + information using semantic search in a Qdrant database.""", + tools=[qdrant_tool], + verbose=True +) + +answer_agent = Agent( + role="Senior Answer Assistant", + goal="Generate answers to questions based on the context provided", + backstory="""You are an expert answer assistant who can generate + answers to questions based on the context provided.""", + tools=[qdrant_tool], + verbose=True +) + +# Define tasks +search_task = Task( + description="""Search for relevant documents about the {query}. + Your final answer should include: + - The relevant information found + - The similarity scores of the results + - The metadata of the relevant documents""", + agent=search_agent +) + +answer_task = Task( + description="""Given the context and metadata of relevant documents, + generate a final answer based on the context.""", + agent=answer_agent +) + +# Run CrewAI workflow +crew = Crew( + agents=[search_agent, answer_agent], + tasks=[search_task, answer_task], + process=Process.sequential, + verbose=True +) + +result = crew.kickoff( + inputs={"query": "What is the role of X in the document?"} +) +print(result) +``` + +## Tool Parameters + +### Required Parameters +- `qdrant_url` (str): The URL of your Qdrant server +- `qdrant_api_key` (str): API key for authentication with Qdrant +- `collection_name` (str): Name of the Qdrant collection to search + +### Optional Parameters +- `limit` (int): Maximum number of results to return (default: 3) +- `score_threshold` (float): Minimum similarity score threshold (default: 0.35) +- `custom_embedding_fn` (Callable[[str], list[float]]): Custom function for text vectorization + +## Search Parameters + +The tool accepts these parameters in its schema: +- `query` (str): The search query to find similar documents +- `filter_by` (str, optional): Metadata field to filter on +- `filter_value` (str, optional): Value to filter by + +## Return Format + +The tool returns results in JSON format: + +```json +[ + { + "metadata": { + // Any metadata stored with the document + }, + "context": "The actual text content of the document", + "distance": 0.95 // Similarity score + } +] +``` + +## Default Embedding + +By default, the tool uses OpenAI's `text-embedding-3-small` model for vectorization. This requires: +- OpenAI API key set in environment: `OPENAI_API_KEY` + +## Custom Embeddings + +Instead of using the default embedding model, you might want to use your own embedding function in cases where you: + +1. Want to use a different embedding model (e.g., Cohere, HuggingFace, Ollama models) +2. Need to reduce costs by using open-source embedding models +3. Have specific requirements for vector dimensions or embedding quality +4. Want to use domain-specific embeddings (e.g., for medical or legal text) + +Here's an example using a HuggingFace model: + +```python +from transformers import AutoTokenizer, AutoModel +import torch + +# Load model and tokenizer +tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2') +model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2') + +def custom_embeddings(text: str) -> list[float]: + # Tokenize and get model outputs + inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True) + outputs = model(**inputs) + + # Use mean pooling to get text embedding + embeddings = outputs.last_hidden_state.mean(dim=1) + + # Convert to list of floats and return + return embeddings[0].tolist() + +# Use custom embeddings with the tool +tool = QdrantVectorSearchTool( + qdrant_url="your_url", + qdrant_api_key="your_key", + collection_name="your_collection", + custom_embedding_fn=custom_embeddings # Pass your custom function +) +``` + +## Error Handling + +The tool handles these specific errors: +- Raises ImportError if `qdrant-client` is not installed (with option to auto-install) +- Raises ValueError if `QDRANT_URL` is not set +- Prompts to install `qdrant-client` if missing using `uv add qdrant-client` + +## Environment Variables + +Required environment variables: +```bash +export QDRANT_URL="your_qdrant_url" # If not provided in constructor +export QDRANT_API_KEY="your_api_key" # If not provided in constructor +export OPENAI_API_KEY="your_openai_key" # If using default embeddings \ No newline at end of file From 5a52d0fd01e3093301d975ca50ad454a0bc58e6f Mon Sep 17 00:00:00 2001 From: Tony Kipkemboi Date: Fri, 21 Feb 2025 17:57:21 -0500 Subject: [PATCH 2/3] Update installation docs to use uv and improve quickstart guide --- docs/installation.mdx | 212 ++++++++++++++++--------------------- docs/quickstart.mdx | 235 ++++++------------------------------------ 2 files changed, 121 insertions(+), 326 deletions(-) diff --git a/docs/installation.mdx b/docs/installation.mdx index 8abba152a7..36cb6584cd 100644 --- a/docs/installation.mdx +++ b/docs/installation.mdx @@ -15,162 +15,124 @@ icon: wrench If you need to update Python, visit [python.org/downloads](https://python.org/downloads) -# Setting Up Your Environment +CrewAI uses the `uv` as its dependency management and package handling tool. It simplifies project setup and execution, offering a seamless experience. -Before installing CrewAI, it's recommended to set up a virtual environment. This helps isolate your project dependencies and avoid conflicts. +If you haven't installed `uv` yet, follow step `1` to quickly get it set up on your system, else you can skip to step `2`. - - Choose your preferred method to create a virtual environment: + + - **On macOS/Linux:** - **Using venv (Python's built-in tool):** - ```shell Terminal - python3 -m venv .venv - ``` - - **Using conda:** - ```shell Terminal - conda create -n crewai-env python=3.12 - ``` - - - - Activate your virtual environment based on your platform: + Use `curl` to download the script and execute it with `sh`: - **On macOS/Linux (venv):** - ```shell Terminal - source .venv/bin/activate + ```shell + curl -LsSf https://astral.sh/uv/install.sh | sh ``` + If your system doesn't have `curl`, you can use `wget`: - **On Windows (venv):** - ```shell Terminal - .venv\Scripts\activate + ```shell + wget -qO- https://astral.sh/uv/install.sh | sh ``` - **Using conda (all platforms):** - ```shell Terminal - conda activate crewai-env - ``` - - + - **On Windows:** -# Installing CrewAI + Use `irm` to download the script and `iex` to execute it: -Now let's get you set up! 🚀 - - - - Install CrewAI with all recommended tools using either method: - ```shell Terminal - pip install 'crewai[tools]' + ```shell + powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex" ``` - or - ```shell Terminal - pip install crewai crewai-tools - ``` - - - Both methods install the core package and additional tools needed for most use cases. - + If you run into any issues, refer to [UV's installation guide](https://docs.astral.sh/uv/getting-started/installation/) for more information. - - If you have an older version of CrewAI installed, you can upgrade it: - ```shell Terminal - pip install --upgrade crewai crewai-tools + + - Run the following command to install CrewAI with all recommended tools: + ```shell + uv tool install 'crewai[tools]' ``` - - - If you see a Poetry-related warning, you'll need to migrate to our new dependency manager: - ```shell Terminal - crewai update + + If you encounter the `PATH` warning, run this command to update your shell: + ```shell + uv tool update-shell ``` - This will update your project to use [UV](https://github.com/astral-sh/uv), our new faster dependency manager. - - - - Skip this step if you're doing a fresh installation. - - + - - Check your installed versions: - ```shell Terminal - pip freeze | grep crewai + - To verify that `crewai` and `crewai-tools` are installed, run: + ```shell + uv pip freeze | grep crewai ``` - - You should see something like: - ```markdown Output + - You should see something like: + ```markdown crewai==X.X.X crewai-tools==X.X.X ``` - Installation successful! You're ready to create your first crew. + Installation successful! You're ready to create your first crew! 🎉 -# Creating a New Project +# Creating a CrewAI Project - - We recommend using the YAML Template scaffolding for a structured approach to defining agents and tasks. - +We recommend using the `YAML` template scaffolding for a structured approach to defining agents and tasks. Here's how to get started: - - Run the CrewAI CLI command: - ```shell Terminal - crewai create crew - ``` - - This creates a new project with the following structure: - - ``` - my_project/ - ├── .gitignore - ├── pyproject.toml - ├── README.md - ├── .env - └── src/ - └── my_project/ - ├── __init__.py - ├── main.py - ├── crew.py - ├── tools/ - │ ├── custom_tool.py - │ └── __init__.py - └── config/ - ├── agents.yaml - └── tasks.yaml - ``` - + + - Run the CrewAI CLI command: + ```shell + crewai create crew + ``` + + - This creates a new project with the following structure: + + ``` + my_project/ + ├── .gitignore + ├── knowledge/ + ├── pyproject.toml + ├── README.md + ├── .env + └── src/ + └── my_project/ + ├── __init__.py + ├── main.py + ├── crew.py + ├── tools/ + │ ├── custom_tool.py + │ └── __init__.py + └── config/ + ├── agents.yaml + └── tasks.yaml + ``` + - - You can install additional tools using UV: - ```shell Terminal - uv add - ``` - - - UV is our preferred package manager as it's significantly faster than pip and provides better dependency resolution. - + + - Your project will contain these essential files: + | File | Purpose | + | --- | --- | + | `agents.yaml` | Define your AI agents and their roles | + | `tasks.yaml` | Set up agent tasks and workflows | + | `.env` | Store API keys and environment variables | + | `main.py` | Project entry point and execution flow | + | `crew.py` | Crew orchestration and coordination | + | `tools/` | Directory for custom agent tools | + | `knowledge/` | Directory for knowledge base | + + - Start by editing `agents.yaml` and `tasks.yaml` to define your crew's behavior. + - Keep sensitive information like API keys in `.env`. - - Your project will contain these essential files: - - | File | Purpose | - | --- | --- | - | `agents.yaml` | Define your AI agents and their roles | - | `tasks.yaml` | Set up agent tasks and workflows | - | `.env` | Store API keys and environment variables | - | `main.py` | Project entry point and execution flow | - | `crew.py` | Crew orchestration and coordination | - | `tools/` | Directory for custom agent tools | - - - Start by editing `agents.yaml` and `tasks.yaml` to define your crew's behavior. - Keep sensitive information like API keys in `.env`. - + + - Before you run your crew, make sure to run: + ```bash + crewai install + ``` + - If you need to install additional packages, use: + ```shell + uv add + ``` + - To run your crew, execute the following command in the root of your project: + ```bash + crewai run + ``` diff --git a/docs/quickstart.mdx b/docs/quickstart.mdx index 9fb8f783ac..df57f756f4 100644 --- a/docs/quickstart.mdx +++ b/docs/quickstart.mdx @@ -8,10 +8,10 @@ icon: rocket Let's create a simple crew that will help us `research` and `report` on the `latest AI developments` for a given topic or subject. -Before we proceed, make sure you have `crewai` and `crewai-tools` installed. +Before we proceed, make sure you have finished installing CrewAI. If you haven't installed them yet, you can do so by following the [installation guide](/installation). -Follow the steps below to get crewing! 🚣‍♂️ +Follow the steps below to get Crewing! 🚣‍♂️ @@ -23,6 +23,13 @@ Follow the steps below to get crewing! 🚣‍♂️ ``` + + + ```shell Terminal + cd latest-ai-development + ``` + + You can also modify the agents as needed to fit your use case or copy and paste as is to your project. @@ -172,21 +179,26 @@ Follow the steps below to get crewing! 🚣‍♂️ - A [Serper.dev](https://serper.dev/) API key: `SERPER_API_KEY=YOUR_KEY_HERE` - Lock the dependencies and install them by using the CLI command but first, navigate to your project directory: - - ```shell Terminal - cd latest-ai-development - crewai install - ``` - + - Lock the dependencies and install them by using the CLI command: + + ```shell Terminal + crewai install + ``` + + - If you have additional packages that you want to install, you can do so by running: + + ```shell Terminal + uv add + ``` + - To run your crew, execute the following command in the root of your project: - - ```bash Terminal - crewai run - ``` - + - To run your crew, execute the following command in the root of your project: + + ```bash Terminal + crewai run + ``` + You should see the output in the console and the `report.md` file should be created in the root of your project with the final report. @@ -258,6 +270,12 @@ Follow the steps below to get crewing! 🚣‍♂️ + +Congratulations! + +You have successfully set up your crew project and are ready to start building your own agentic workflows! + + ### Note on Consistency in Naming The names you use in your YAML files (`agents.yaml` and `tasks.yaml`) should match the method names in your Python code. @@ -297,194 +315,9 @@ email_summarizer_task: - research_task ``` -Use the annotations to properly reference the agent and task in the `crew.py` file. - -### Annotations include: - -Here are examples of how to use each annotation in your CrewAI project, and when you should use them: - -#### @agent -Used to define an agent in your crew. Use this when: -- You need to create a specialized AI agent with a specific role -- You want the agent to be automatically collected and managed by the crew -- You need to reuse the same agent configuration across multiple tasks - -```python -@agent -def research_agent(self) -> Agent: - return Agent( - role="Research Analyst", - goal="Conduct thorough research on given topics", - backstory="Expert researcher with years of experience in data analysis", - tools=[SerperDevTool()], - verbose=True - ) -``` - -#### @task -Used to define a task that can be executed by agents. Use this when: -- You need to define a specific piece of work for an agent -- You want tasks to be automatically sequenced and managed -- You need to establish dependencies between different tasks - -```python -@task -def research_task(self) -> Task: - return Task( - description="Research the latest developments in AI technology", - expected_output="A comprehensive report on AI advancements", - agent=self.research_agent(), - output_file="output/research.md" - ) -``` - -#### @crew -Used to define your crew configuration. Use this when: -- You want to automatically collect all @agent and @task definitions -- You need to specify how tasks should be processed (sequential or hierarchical) -- You want to set up crew-wide configurations - -```python -@crew -def research_crew(self) -> Crew: - return Crew( - agents=self.agents, # Automatically collected from @agent methods - tasks=self.tasks, # Automatically collected from @task methods - process=Process.sequential, - verbose=True - ) -``` - -#### @tool -Used to create custom tools for your agents. Use this when: -- You need to give agents specific capabilities (like web search, data analysis) -- You want to encapsulate external API calls or complex operations -- You need to share functionality across multiple agents - -```python -@tool -def web_search_tool(query: str, max_results: int = 5) -> list[str]: - """ - Search the web for information. - - Args: - query: The search query - max_results: Maximum number of results to return - - Returns: - List of search results - """ - # Implement your search logic here - return [f"Result {i} for: {query}" for i in range(max_results)] -``` - -#### @before_kickoff -Used to execute logic before the crew starts. Use this when: -- You need to validate or preprocess input data -- You want to set up resources or configurations before execution -- You need to perform any initialization logic - -```python -@before_kickoff -def validate_inputs(self, inputs: Optional[Dict[str, Any]]) -> Optional[Dict[str, Any]]: - """Validate and preprocess inputs before the crew starts.""" - if inputs is None: - return None - - if 'topic' not in inputs: - raise ValueError("Topic is required") - - # Add additional context - inputs['timestamp'] = datetime.now().isoformat() - inputs['topic'] = inputs['topic'].strip().lower() - return inputs -``` - -#### @after_kickoff -Used to process results after the crew completes. Use this when: -- You need to format or transform the final output -- You want to perform cleanup operations -- You need to save or log the results in a specific way - -```python -@after_kickoff -def process_results(self, result: CrewOutput) -> CrewOutput: - """Process and format the results after the crew completes.""" - result.raw = result.raw.strip() - result.raw = f""" - # Research Results - Generated on: {datetime.now().isoformat()} - - {result.raw} - """ - return result -``` - -#### @callback -Used to handle events during crew execution. Use this when: -- You need to monitor task progress -- You want to log intermediate results -- You need to implement custom progress tracking or metrics - -```python -@callback -def log_task_completion(self, task: Task, output: str): - """Log task completion details for monitoring.""" - print(f"Task '{task.description}' completed") - print(f"Output length: {len(output)} characters") - print(f"Agent used: {task.agent.role}") - print("-" * 50) -``` - -#### @cache_handler -Used to implement custom caching for task results. Use this when: -- You want to avoid redundant expensive operations -- You need to implement custom cache storage or expiration logic -- You want to persist results between runs - -```python -@cache_handler -def custom_cache(self, key: str) -> Optional[str]: - """Custom cache implementation for storing task results.""" - cache_file = f"cache/{key}.json" - - if os.path.exists(cache_file): - with open(cache_file, 'r') as f: - data = json.load(f) - # Check if cache is still valid (e.g., not expired) - if datetime.fromisoformat(data['timestamp']) > datetime.now() - timedelta(days=1): - return data['result'] - return None -``` - - -These decorators are part of the CrewAI framework and help organize your crew's structure by automatically collecting agents, tasks, and handling various lifecycle events. -They should be used within a class decorated with `@CrewBase`. - - -### Replay Tasks from Latest Crew Kickoff - -CrewAI now includes a replay feature that allows you to list the tasks from the last run and replay from a specific one. To use this feature, run. - -```shell -crewai replay -``` - -Replace `` with the ID of the task you want to replay. - -### Reset Crew Memory - -If you need to reset the memory of your crew before running it again, you can do so by calling the reset memory feature: - -```shell -crewai reset-memories --all -``` - -This will clear the crew's memory, allowing for a fresh start. - ## Deploying Your Project -The easiest way to deploy your crew is through CrewAI Enterprise, where you can deploy your crew in a few clicks. +The easiest way to deploy your crew is through [CrewAI Enterprise](http://app.crewai.com), where you can deploy your crew in a few clicks. Date: Sun, 2 Mar 2025 10:44:46 -0500 Subject: [PATCH 3/3] docs: improve installation instructions and add structured outputs video --- docs/concepts/tasks.mdx | 13 +++++++++++++ docs/installation.mdx | 20 ++++++++++---------- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/docs/concepts/tasks.mdx b/docs/concepts/tasks.mdx index 120f5d547b..1cd4034826 100644 --- a/docs/concepts/tasks.mdx +++ b/docs/concepts/tasks.mdx @@ -876,6 +876,19 @@ save_output_task = Task( #... ``` +Check out the video below to see how to use structured outputs in CrewAI: + + + ## Conclusion Tasks are the driving force behind the actions of agents in CrewAI. diff --git a/docs/installation.mdx b/docs/installation.mdx index 36cb6584cd..f051cf13c7 100644 --- a/docs/installation.mdx +++ b/docs/installation.mdx @@ -17,7 +17,7 @@ icon: wrench CrewAI uses the `uv` as its dependency management and package handling tool. It simplifies project setup and execution, offering a seamless experience. -If you haven't installed `uv` yet, follow step `1` to quickly get it set up on your system, else you can skip to step `2`. +If you haven't installed `uv` yet, follow **step 1** to quickly get it set up on your system, else you can skip to **step 2**. @@ -45,25 +45,25 @@ If you haven't installed `uv` yet, follow step `1` to quickly get it set up on y - - Run the following command to install CrewAI with all recommended tools: + - Run the following command to install `crewai` CLI: ```shell - uv tool install 'crewai[tools]' + uv tool install crewai ``` - If you encounter the `PATH` warning, run this command to update your shell: + If you encounter a `PATH` warning, run this command to update your shell: ```shell uv tool update-shell ``` - - To verify that `crewai` and `crewai-tools` are installed, run: + - To verify that `crewai` is installed, run: ```shell - uv pip freeze | grep crewai + uv tools list ``` - You should see something like: ```markdown - crewai==X.X.X - crewai-tools==X.X.X + crewai v0.102.0 + - crewai ``` Installation successful! You're ready to create your first crew! 🎉 @@ -75,9 +75,9 @@ We recommend using the `YAML` template scaffolding for a structured approach to - - Run the CrewAI CLI command: + - Run the `crewai` CLI command: ```shell - crewai create crew + crewai create crew ``` - This creates a new project with the following structure: